{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 15284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.270111183780249e-09, "logits/chosen": -2.634561777114868, "logits/rejected": -2.673060417175293, "logps/chosen": -207.5323944091797, "logps/rejected": -286.9266052246094, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.270111183780249e-08, "logits/chosen": -2.217697858810425, "logits/rejected": -1.9652551412582397, "logps/chosen": -186.06736755371094, "logps/rejected": -165.34738159179688, "loss": 0.6932, "rewards/accuracies": 0.1666666716337204, "rewards/chosen": -0.0014695884892717004, "rewards/margins": -0.002330251270905137, "rewards/rejected": 0.0008606627234257758, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.540222367560497e-08, "logits/chosen": -2.4319119453430176, "logits/rejected": -2.2228429317474365, "logps/chosen": -232.4527587890625, "logps/rejected": -231.435546875, "loss": 0.6931, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -4.6026078052818775e-05, "rewards/margins": 7.92687205830589e-05, "rewards/rejected": -0.0001252948131877929, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.810333551340746e-08, "logits/chosen": -2.25822377204895, "logits/rejected": -2.162461996078491, "logps/chosen": -197.378173828125, "logps/rejected": -219.074951171875, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00045980390859767795, "rewards/margins": 0.00029530542087741196, "rewards/rejected": 0.0001644986041355878, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3080444735120995e-07, "logits/chosen": -2.211453914642334, "logits/rejected": -2.251152992248535, "logps/chosen": -276.0306701660156, "logps/rejected": -265.74371337890625, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0004297545528970659, "rewards/margins": -0.0005864914273843169, "rewards/rejected": 0.00015673683083150536, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.6350555918901243e-07, "logits/chosen": -2.3490729331970215, "logits/rejected": -2.1418588161468506, "logps/chosen": -204.78414916992188, "logps/rejected": -184.72738647460938, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0002141618897439912, "rewards/margins": 0.0005374640459194779, "rewards/rejected": -0.0003233022871427238, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9620667102681492e-07, "logits/chosen": -2.30709171295166, "logits/rejected": -2.0678863525390625, "logps/chosen": -209.7256622314453, "logps/rejected": -185.79867553710938, "loss": 0.6932, "rewards/accuracies": 0.375, "rewards/chosen": 0.0008024474373087287, "rewards/margins": -0.0010456187883391976, "rewards/rejected": 0.00184806645847857, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.289077828646174e-07, "logits/chosen": -2.2695107460021973, "logits/rejected": -2.1568443775177, "logps/chosen": -218.00942993164062, "logps/rejected": -207.9250946044922, "loss": 0.6932, "rewards/accuracies": 0.375, "rewards/chosen": 0.00100115523673594, "rewards/margins": -0.00015484937466681004, "rewards/rejected": 0.00115600461140275, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.616088947024199e-07, "logits/chosen": -2.5082268714904785, "logits/rejected": -2.227625608444214, "logps/chosen": -258.78826904296875, "logps/rejected": -213.654541015625, "loss": 0.6931, "rewards/accuracies": 0.375, "rewards/chosen": 0.0016212640330195427, "rewards/margins": 0.00033446805900894105, "rewards/rejected": 0.001286796061322093, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.943100065402224e-07, "logits/chosen": -2.25887393951416, "logits/rejected": -2.173290252685547, "logps/chosen": -184.6951141357422, "logps/rejected": -165.476806640625, "loss": 0.6932, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0008967015892267227, "rewards/margins": 0.00010184949496760964, "rewards/rejected": 0.0007948519778437912, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.2701111837802487e-07, "logits/chosen": -2.4311330318450928, "logits/rejected": -2.425891399383545, "logps/chosen": -168.7532196044922, "logps/rejected": -183.79940795898438, "loss": 0.6932, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0018166687805205584, "rewards/margins": -0.00107972149271518, "rewards/rejected": 0.0028963901568204165, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.348848581314087, "eval_logits/rejected": -2.1603763103485107, "eval_logps/chosen": -231.76463317871094, "eval_logps/rejected": -211.4439239501953, "eval_loss": 0.6931134462356567, "eval_rewards/accuracies": 0.4950000047683716, "eval_rewards/chosen": 0.0024031461216509342, "eval_rewards/margins": 0.0007233246578834951, "eval_rewards/rejected": 0.0016798212891444564, "eval_runtime": 707.3142, "eval_samples_per_second": 2.828, "eval_steps_per_second": 1.414, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -2.347839593887329, "logits/rejected": -1.9999773502349854, "logps/chosen": -222.35336303710938, "logps/rejected": -166.99118041992188, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.004421192221343517, "rewards/margins": 0.0019870258402079344, "rewards/rejected": 0.002434166381135583, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.9241334205362984e-07, "logits/chosen": -2.340026378631592, "logits/rejected": -2.244414806365967, "logps/chosen": -223.99569702148438, "logps/rejected": -234.1189422607422, "loss": 0.6931, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.00296420999802649, "rewards/margins": 0.0004412824346218258, "rewards/rejected": 0.002522927476093173, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.251144538914324e-07, "logits/chosen": -2.2618038654327393, "logits/rejected": -2.217468738555908, "logps/chosen": -149.3894500732422, "logps/rejected": -148.2598114013672, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0025640667881816626, "rewards/margins": 0.00097900559194386, "rewards/rejected": 0.0015850610798224807, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.578155657292348e-07, "logits/chosen": -2.3222999572753906, "logits/rejected": -2.2233099937438965, "logps/chosen": -225.5967254638672, "logps/rejected": -159.4678955078125, "loss": 0.6931, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0036222212947905064, "rewards/margins": 0.0013900110498070717, "rewards/rejected": 0.0022322097793221474, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.905166775670374e-07, "logits/chosen": -2.367203950881958, "logits/rejected": -2.1586971282958984, "logps/chosen": -230.97109985351562, "logps/rejected": -229.11178588867188, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.003845545928925276, "rewards/margins": 0.003386072115972638, "rewards/rejected": 0.00045947395847178996, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.232177894048398e-07, "logits/chosen": -2.2155299186706543, "logits/rejected": -2.2280077934265137, "logps/chosen": -260.2898864746094, "logps/rejected": -224.85397338867188, "loss": 0.693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005785978864878416, "rewards/margins": 0.004330903757363558, "rewards/rejected": 0.0014550751075148582, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.559189012426422e-07, "logits/chosen": -2.3153624534606934, "logits/rejected": -2.0374207496643066, "logps/chosen": -180.40968322753906, "logps/rejected": -156.81607055664062, "loss": 0.6928, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004428991116583347, "rewards/margins": 0.0036846441216766834, "rewards/rejected": 0.0007443467038683593, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.886200130804448e-07, "logits/chosen": -2.3993406295776367, "logits/rejected": -2.3392200469970703, "logps/chosen": -217.6866455078125, "logps/rejected": -198.7965850830078, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005419188644737005, "rewards/margins": 0.005340488161891699, "rewards/rejected": 7.869987894082442e-05, "step": 180 }, { "epoch": 0.01, "learning_rate": 6.213211249182473e-07, "logits/chosen": -2.0705583095550537, "logits/rejected": -2.175136089324951, "logps/chosen": -191.1099853515625, "logps/rejected": -208.73690795898438, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.003862470854073763, "rewards/margins": 0.004704989958554506, "rewards/rejected": -0.0008425191044807434, "step": 190 }, { "epoch": 0.01, "learning_rate": 6.540222367560497e-07, "logits/chosen": -2.27732253074646, "logits/rejected": -2.24127197265625, "logps/chosen": -146.89163208007812, "logps/rejected": -177.7828826904297, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0032042518723756075, "rewards/margins": 0.0057475874200463295, "rewards/rejected": -0.0025433353148400784, "step": 200 }, { "epoch": 0.01, "eval_logits/chosen": -2.3492345809936523, "eval_logits/rejected": -2.1607820987701416, "eval_logps/chosen": -231.4797821044922, "eval_logps/rejected": -211.65264892578125, "eval_loss": 0.6928496360778809, "eval_rewards/accuracies": 0.5835000276565552, "eval_rewards/chosen": 0.005251556169241667, "eval_rewards/margins": 0.0056591029278934, "eval_rewards/rejected": -0.0004075466131325811, "eval_runtime": 708.7402, "eval_samples_per_second": 2.822, "eval_steps_per_second": 1.411, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.867233485938523e-07, "logits/chosen": -2.4269886016845703, "logits/rejected": -2.2028229236602783, "logps/chosen": -218.64584350585938, "logps/rejected": -188.28201293945312, "loss": 0.6928, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008876695297658443, "rewards/margins": 0.010037838481366634, "rewards/rejected": -0.0011611439986154437, "step": 210 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.2129268646240234, "logits/rejected": -2.0407798290252686, "logps/chosen": -182.85243225097656, "logps/rejected": -174.8861083984375, "loss": 0.693, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.004839606583118439, "rewards/margins": 0.004267896059900522, "rewards/rejected": 0.000571710173971951, "step": 220 }, { "epoch": 0.02, "learning_rate": 7.521255722694571e-07, "logits/chosen": -2.421215534210205, "logits/rejected": -2.025574207305908, "logps/chosen": -278.98101806640625, "logps/rejected": -184.08096313476562, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.007983444258570671, "rewards/margins": 0.005552899558097124, "rewards/rejected": 0.0024305458646267653, "step": 230 }, { "epoch": 0.02, "learning_rate": 7.848266841072597e-07, "logits/chosen": -2.199948310852051, "logits/rejected": -2.164917469024658, "logps/chosen": -214.61245727539062, "logps/rejected": -206.2373504638672, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": 0.01238412968814373, "rewards/margins": 0.01192299835383892, "rewards/rejected": 0.0004611331969499588, "step": 240 }, { "epoch": 0.02, "learning_rate": 8.175277959450622e-07, "logits/chosen": -2.1677653789520264, "logits/rejected": -2.3267643451690674, "logps/chosen": -218.1819610595703, "logps/rejected": -220.6886444091797, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": 0.022441856563091278, "rewards/margins": 0.009667301550507545, "rewards/rejected": 0.012774554081261158, "step": 250 }, { "epoch": 0.02, "learning_rate": 8.502289077828648e-07, "logits/chosen": -2.5052707195281982, "logits/rejected": -2.144476890563965, "logps/chosen": -254.23361206054688, "logps/rejected": -189.0504608154297, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": 0.02921391651034355, "rewards/margins": 0.011754143051803112, "rewards/rejected": 0.017459776252508163, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.829300196206672e-07, "logits/chosen": -2.422799587249756, "logits/rejected": -2.1278483867645264, "logps/chosen": -246.3038330078125, "logps/rejected": -230.50228881835938, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02507014013826847, "rewards/margins": 0.014279340393841267, "rewards/rejected": 0.01079079881310463, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.156311314584696e-07, "logits/chosen": -2.3067777156829834, "logits/rejected": -2.1929802894592285, "logps/chosen": -159.95680236816406, "logps/rejected": -146.39175415039062, "loss": 0.6928, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.022316502407193184, "rewards/margins": 0.009042134508490562, "rewards/rejected": 0.013274368830025196, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.483322432962722e-07, "logits/chosen": -2.555990695953369, "logits/rejected": -2.1609978675842285, "logps/chosen": -282.0716857910156, "logps/rejected": -225.60147094726562, "loss": 0.693, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.031071314588189125, "rewards/margins": 0.010234272107481956, "rewards/rejected": 0.02083704061806202, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.810333551340747e-07, "logits/chosen": -2.3414573669433594, "logits/rejected": -2.197089433670044, "logps/chosen": -265.0999755859375, "logps/rejected": -238.67355346679688, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": 0.035596780478954315, "rewards/margins": 0.022229164838790894, "rewards/rejected": 0.013367618434131145, "step": 300 }, { "epoch": 0.02, "eval_logits/chosen": -2.3535282611846924, "eval_logits/rejected": -2.164868116378784, "eval_logps/chosen": -228.6509246826172, "eval_logps/rejected": -209.84600830078125, "eval_loss": 0.6924605965614319, "eval_rewards/accuracies": 0.5830000042915344, "eval_rewards/chosen": 0.03354022651910782, "eval_rewards/margins": 0.015881428495049477, "eval_rewards/rejected": 0.017658798024058342, "eval_runtime": 705.9246, "eval_samples_per_second": 2.833, "eval_steps_per_second": 1.417, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.0137344669718771e-06, "logits/chosen": -2.350961208343506, "logits/rejected": -2.365408420562744, "logps/chosen": -166.96469116210938, "logps/rejected": -155.90208435058594, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.025256266817450523, "rewards/margins": 0.005879827309399843, "rewards/rejected": 0.019376439973711967, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.0464355788096796e-06, "logits/chosen": -2.455401659011841, "logits/rejected": -2.0602850914001465, "logps/chosen": -221.2332305908203, "logps/rejected": -192.01666259765625, "loss": 0.6924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03931882977485657, "rewards/margins": 0.015343300998210907, "rewards/rejected": 0.02397553250193596, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -2.4358632564544678, "logits/rejected": -2.2039878368377686, "logps/chosen": -203.50843811035156, "logps/rejected": -175.61509704589844, "loss": 0.6925, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04902844503521919, "rewards/margins": 0.022538715973496437, "rewards/rejected": 0.026489730924367905, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.1118378024852844e-06, "logits/chosen": -2.187950372695923, "logits/rejected": -2.353523015975952, "logps/chosen": -150.39413452148438, "logps/rejected": -177.75634765625, "loss": 0.6933, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.033202774822711945, "rewards/margins": -0.0008061464759521186, "rewards/rejected": 0.034008920192718506, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.144538914323087e-06, "logits/chosen": -2.426027774810791, "logits/rejected": -1.987079381942749, "logps/chosen": -317.85028076171875, "logps/rejected": -247.85317993164062, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": 0.042603787034749985, "rewards/margins": 0.02849414013326168, "rewards/rejected": 0.014109638519585133, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.1772400261608895e-06, "logits/chosen": -2.4899606704711914, "logits/rejected": -2.1962363719940186, "logps/chosen": -220.11160278320312, "logps/rejected": -192.32504272460938, "loss": 0.6919, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.053684353828430176, "rewards/margins": 0.023779017850756645, "rewards/rejected": 0.02990533970296383, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.2099411379986922e-06, "logits/chosen": -2.1702442169189453, "logits/rejected": -2.2464897632598877, "logps/chosen": -192.40721130371094, "logps/rejected": -206.269287109375, "loss": 0.6929, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.05089518427848816, "rewards/margins": 0.010943805798888206, "rewards/rejected": 0.0399513766169548, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.2426422498364946e-06, "logits/chosen": -2.3307952880859375, "logits/rejected": -2.042811393737793, "logps/chosen": -216.83474731445312, "logps/rejected": -161.6975860595703, "loss": 0.6926, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.04813487082719803, "rewards/margins": 0.024402152746915817, "rewards/rejected": 0.02373271808028221, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.2753433616742968e-06, "logits/chosen": -2.329103946685791, "logits/rejected": -2.249396562576294, "logps/chosen": -181.30165100097656, "logps/rejected": -244.35458374023438, "loss": 0.6919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.039993561804294586, "rewards/margins": 0.023708099499344826, "rewards/rejected": 0.01628546044230461, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.3080444735120995e-06, "logits/chosen": -2.490332841873169, "logits/rejected": -2.120997428894043, "logps/chosen": -220.3374481201172, "logps/rejected": -178.84103393554688, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.05155234411358833, "rewards/margins": 0.0375380739569664, "rewards/rejected": 0.014014266431331635, "step": 400 }, { "epoch": 0.03, "eval_logits/chosen": -2.354814052581787, "eval_logits/rejected": -2.1659581661224365, "eval_logps/chosen": -227.3407745361328, "eval_logps/rejected": -209.3865509033203, "eval_loss": 0.6920492649078369, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": 0.04664193466305733, "eval_rewards/margins": 0.02438831515610218, "eval_rewards/rejected": 0.022253619506955147, "eval_runtime": 707.4391, "eval_samples_per_second": 2.827, "eval_steps_per_second": 1.414, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.3407455853499021e-06, "logits/chosen": -2.4750816822052, "logits/rejected": -2.324173927307129, "logps/chosen": -257.1285095214844, "logps/rejected": -223.27047729492188, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05224750190973282, "rewards/margins": 0.025383714586496353, "rewards/rejected": 0.026863792911171913, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.3734466971877046e-06, "logits/chosen": -2.3002800941467285, "logits/rejected": -2.1997017860412598, "logps/chosen": -177.15582275390625, "logps/rejected": -172.67115783691406, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": 0.04763823747634888, "rewards/margins": 0.029840771108865738, "rewards/rejected": 0.01779746450483799, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.406147809025507e-06, "logits/chosen": -2.2951817512512207, "logits/rejected": -2.1071887016296387, "logps/chosen": -209.26852416992188, "logps/rejected": -180.22879028320312, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05878716707229614, "rewards/margins": 0.03459661826491356, "rewards/rejected": 0.024190548807382584, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -2.3935062885284424, "logits/rejected": -2.108060121536255, "logps/chosen": -249.3080596923828, "logps/rejected": -221.1660919189453, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": 0.05189204961061478, "rewards/margins": 0.017697608098387718, "rewards/rejected": 0.03419443964958191, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.471550032701112e-06, "logits/chosen": -2.412630558013916, "logits/rejected": -2.2085201740264893, "logps/chosen": -184.7351531982422, "logps/rejected": -194.98269653320312, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04912736266851425, "rewards/margins": 0.04446180909872055, "rewards/rejected": 0.004665557760745287, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.5042511445389143e-06, "logits/chosen": -2.159377336502075, "logits/rejected": -2.205676555633545, "logps/chosen": -149.52809143066406, "logps/rejected": -215.054931640625, "loss": 0.6904, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.014119003899395466, "rewards/margins": 0.02100800909101963, "rewards/rejected": -0.006889003328979015, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.536952256376717e-06, "logits/chosen": -2.049952983856201, "logits/rejected": -2.111029624938965, "logps/chosen": -199.77395629882812, "logps/rejected": -250.3234405517578, "loss": 0.6901, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.011566747911274433, "rewards/margins": 0.0498543456196785, "rewards/rejected": -0.03828759863972664, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.5696533682145194e-06, "logits/chosen": -2.448256731033325, "logits/rejected": -2.215850353240967, "logps/chosen": -178.30746459960938, "logps/rejected": -151.38975524902344, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.05939297750592232, "rewards/margins": 0.04141029715538025, "rewards/rejected": 0.01798268035054207, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.602354480052322e-06, "logits/chosen": -2.4165821075439453, "logits/rejected": -2.3280694484710693, "logps/chosen": -259.0218811035156, "logps/rejected": -207.7999267578125, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.07617911696434021, "rewards/margins": 0.03729747608304024, "rewards/rejected": 0.03888164460659027, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.6350555918901245e-06, "logits/chosen": -2.21075701713562, "logits/rejected": -1.8819067478179932, "logps/chosen": -213.0443572998047, "logps/rejected": -203.5986328125, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.05024771764874458, "rewards/margins": 0.0509779155254364, "rewards/rejected": -0.0007301971199922264, "step": 500 }, { "epoch": 0.03, "eval_logits/chosen": -2.349929094314575, "eval_logits/rejected": -2.1615867614746094, "eval_logps/chosen": -225.62718200683594, "eval_logps/rejected": -209.42613220214844, "eval_loss": 0.6916049718856812, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": 0.06377778202295303, "eval_rewards/margins": 0.04191993921995163, "eval_rewards/rejected": 0.021857835352420807, "eval_runtime": 704.9072, "eval_samples_per_second": 2.837, "eval_steps_per_second": 1.419, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.6677567037279269e-06, "logits/chosen": -2.4886791706085205, "logits/rejected": -2.138219118118286, "logps/chosen": -287.70098876953125, "logps/rejected": -252.02603149414062, "loss": 0.6929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.085626520216465, "rewards/margins": 0.04710019752383232, "rewards/rejected": 0.038526326417922974, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.7004578155657295e-06, "logits/chosen": -2.2930407524108887, "logits/rejected": -2.316594362258911, "logps/chosen": -199.58074951171875, "logps/rejected": -185.34860229492188, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0713859498500824, "rewards/margins": 0.061885036528110504, "rewards/rejected": 0.009500918909907341, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.7331589274035318e-06, "logits/chosen": -2.1680140495300293, "logits/rejected": -2.0505690574645996, "logps/chosen": -180.614990234375, "logps/rejected": -186.27236938476562, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0331156924366951, "rewards/margins": 0.06211583688855171, "rewards/rejected": -0.029000144451856613, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.7658600392413344e-06, "logits/chosen": -2.396062135696411, "logits/rejected": -2.2367682456970215, "logps/chosen": -199.58575439453125, "logps/rejected": -187.2077178955078, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.032696597278118134, "rewards/margins": 0.049915581941604614, "rewards/rejected": -0.017218980938196182, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.396876811981201, "logits/rejected": -1.942486047744751, "logps/chosen": -256.23150634765625, "logps/rejected": -247.9710235595703, "loss": 0.692, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0731818825006485, "rewards/margins": 0.08128007501363754, "rewards/rejected": -0.008098193444311619, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.8312622629169393e-06, "logits/chosen": -2.3080027103424072, "logits/rejected": -2.1690850257873535, "logps/chosen": -257.26641845703125, "logps/rejected": -226.06912231445312, "loss": 0.6917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.06187974289059639, "rewards/margins": 0.03323299437761307, "rewards/rejected": 0.028646748512983322, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.8639633747547417e-06, "logits/chosen": -2.3971519470214844, "logits/rejected": -2.228024482727051, "logps/chosen": -224.38430786132812, "logps/rejected": -191.48605346679688, "loss": 0.6914, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04760807007551193, "rewards/margins": 0.027385840192437172, "rewards/rejected": 0.02022222802042961, "step": 570 }, { "epoch": 0.04, "learning_rate": 1.8966644865925443e-06, "logits/chosen": -2.246232748031616, "logits/rejected": -2.271991729736328, "logps/chosen": -234.4510498046875, "logps/rejected": -250.18887329101562, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.04757726192474365, "rewards/margins": 0.043433815240859985, "rewards/rejected": 0.0041434429585933685, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.9293655984303466e-06, "logits/chosen": -2.657580852508545, "logits/rejected": -2.2350101470947266, "logps/chosen": -282.17510986328125, "logps/rejected": -204.83383178710938, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.007911397144198418, "rewards/margins": 0.056634120643138885, "rewards/rejected": -0.04872272536158562, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.9620667102681494e-06, "logits/chosen": -2.393247127532959, "logits/rejected": -2.4066872596740723, "logps/chosen": -187.5821533203125, "logps/rejected": -175.9160614013672, "loss": 0.6919, "rewards/accuracies": 0.375, "rewards/chosen": 0.016703059896826744, "rewards/margins": 0.026545118540525436, "rewards/rejected": -0.009842058643698692, "step": 600 }, { "epoch": 0.04, "eval_logits/chosen": -2.3568413257598877, "eval_logits/rejected": -2.1674532890319824, "eval_logps/chosen": -227.02455139160156, "eval_logps/rejected": -211.3561248779297, "eval_loss": 0.6913270354270935, "eval_rewards/accuracies": 0.597000002861023, "eval_rewards/chosen": 0.04980393126606941, "eval_rewards/margins": 0.04724626615643501, "eval_rewards/rejected": 0.00255767023190856, "eval_runtime": 708.4382, "eval_samples_per_second": 2.823, "eval_steps_per_second": 1.412, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.994767822105952e-06, "logits/chosen": -2.306018829345703, "logits/rejected": -2.208861827850342, "logps/chosen": -171.64891052246094, "logps/rejected": -183.595947265625, "loss": 0.6893, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.03554733842611313, "rewards/margins": 0.055120062083005905, "rewards/rejected": -0.019572719931602478, "step": 610 }, { "epoch": 0.04, "learning_rate": 2.0274689339437543e-06, "logits/chosen": -2.233059883117676, "logits/rejected": -2.0187458992004395, "logps/chosen": -270.9899597167969, "logps/rejected": -238.4331817626953, "loss": 0.6921, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.07699786126613617, "rewards/margins": 0.027974560856819153, "rewards/rejected": 0.04902329668402672, "step": 620 }, { "epoch": 0.04, "learning_rate": 2.0601700457815567e-06, "logits/chosen": -2.382429361343384, "logits/rejected": -2.015047788619995, "logps/chosen": -253.5185089111328, "logps/rejected": -211.54025268554688, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.05836200714111328, "rewards/margins": 0.015349363908171654, "rewards/rejected": 0.04301264509558678, "step": 630 }, { "epoch": 0.04, "learning_rate": 2.092871157619359e-06, "logits/chosen": -2.4374704360961914, "logits/rejected": -2.250887393951416, "logps/chosen": -169.18408203125, "logps/rejected": -187.0492706298828, "loss": 0.6917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.07478086650371552, "rewards/margins": 0.041536975651979446, "rewards/rejected": 0.03324388712644577, "step": 640 }, { "epoch": 0.04, "learning_rate": 2.1255722694571616e-06, "logits/chosen": -2.44903302192688, "logits/rejected": -2.05329966545105, "logps/chosen": -271.42828369140625, "logps/rejected": -182.59605407714844, "loss": 0.6922, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.07466375082731247, "rewards/margins": 0.041353899985551834, "rewards/rejected": 0.033309854567050934, "step": 650 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.3471286296844482, "logits/rejected": -2.2432503700256348, "logps/chosen": -207.7042236328125, "logps/rejected": -214.0726776123047, "loss": 0.6938, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.04722776263952255, "rewards/margins": 0.008569743484258652, "rewards/rejected": 0.0386580154299736, "step": 660 }, { "epoch": 0.04, "learning_rate": 2.190974493132767e-06, "logits/chosen": -2.3504185676574707, "logits/rejected": -2.0488924980163574, "logps/chosen": -233.1539306640625, "logps/rejected": -181.9641571044922, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.04525148868560791, "rewards/margins": 0.05067021772265434, "rewards/rejected": -0.005418726243078709, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.223675604970569e-06, "logits/chosen": -2.4212958812713623, "logits/rejected": -2.2168402671813965, "logps/chosen": -204.07693481445312, "logps/rejected": -190.05075073242188, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06657350063323975, "rewards/margins": 0.033041100949048996, "rewards/rejected": 0.03353239223361015, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.2563767168083718e-06, "logits/chosen": -2.495441436767578, "logits/rejected": -2.002847194671631, "logps/chosen": -250.1965789794922, "logps/rejected": -191.36761474609375, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.07055240124464035, "rewards/margins": 0.04203369468450546, "rewards/rejected": 0.028518706560134888, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.289077828646174e-06, "logits/chosen": -2.32529354095459, "logits/rejected": -2.090954542160034, "logps/chosen": -234.6331329345703, "logps/rejected": -214.04269409179688, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.04427819699048996, "rewards/margins": 0.04115144535899162, "rewards/rejected": 0.0031267497688531876, "step": 700 }, { "epoch": 0.05, "eval_logits/chosen": -2.350055456161499, "eval_logits/rejected": -2.161485433578491, "eval_logps/chosen": -226.39279174804688, "eval_logps/rejected": -210.55441284179688, "eval_loss": 0.6913213133811951, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": 0.056121550500392914, "eval_rewards/margins": 0.04554666578769684, "eval_rewards/rejected": 0.010574882850050926, "eval_runtime": 706.9392, "eval_samples_per_second": 2.829, "eval_steps_per_second": 1.415, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.3217789404839766e-06, "logits/chosen": -2.2059988975524902, "logits/rejected": -2.29160737991333, "logps/chosen": -154.5279541015625, "logps/rejected": -204.50648498535156, "loss": 0.691, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.023031946271657944, "rewards/margins": 0.03035845421254635, "rewards/rejected": -0.007326505146920681, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.354480052321779e-06, "logits/chosen": -2.52923321723938, "logits/rejected": -2.1005005836486816, "logps/chosen": -253.52212524414062, "logps/rejected": -201.50367736816406, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": 0.048673033714294434, "rewards/margins": 0.0762287974357605, "rewards/rejected": -0.027555758133530617, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.3871811641595815e-06, "logits/chosen": -2.3575785160064697, "logits/rejected": -2.1780619621276855, "logps/chosen": -252.4840087890625, "logps/rejected": -198.02867126464844, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": -0.056915633380413055, "rewards/margins": 0.042005524039268494, "rewards/rejected": -0.09892116487026215, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.4198822759973843e-06, "logits/chosen": -2.1879830360412598, "logits/rejected": -2.2039308547973633, "logps/chosen": -209.80526733398438, "logps/rejected": -226.54446411132812, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19199207425117493, "rewards/margins": 0.028066366910934448, "rewards/rejected": -0.220058411359787, "step": 740 }, { "epoch": 0.05, "learning_rate": 2.4525833878351864e-06, "logits/chosen": -2.4402458667755127, "logits/rejected": -2.2395517826080322, "logps/chosen": -271.01220703125, "logps/rejected": -208.87350463867188, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18211308121681213, "rewards/margins": 0.04141292721033096, "rewards/rejected": -0.2235260307788849, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.4852844996729892e-06, "logits/chosen": -2.1985716819763184, "logits/rejected": -2.10333514213562, "logps/chosen": -263.1232604980469, "logps/rejected": -269.29693603515625, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23998942971229553, "rewards/margins": 0.04715558513998985, "rewards/rejected": -0.2871449887752533, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.3487696647644043, "logits/rejected": -2.030247211456299, "logps/chosen": -293.29290771484375, "logps/rejected": -244.9794158935547, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.1979665905237198, "rewards/margins": 0.056348543614149094, "rewards/rejected": -0.2543151080608368, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.5506867233485937e-06, "logits/chosen": -2.4455208778381348, "logits/rejected": -2.0224125385284424, "logps/chosen": -275.57421875, "logps/rejected": -228.21737670898438, "loss": 0.6901, "rewards/accuracies": 0.75, "rewards/chosen": -0.09878290444612503, "rewards/margins": 0.10141804069280624, "rewards/rejected": -0.20020096004009247, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.5833878351863965e-06, "logits/chosen": -2.4715168476104736, "logits/rejected": -2.403022050857544, "logps/chosen": -257.7041931152344, "logps/rejected": -262.29986572265625, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05706269294023514, "rewards/margins": 0.07216020673513412, "rewards/rejected": -0.12922289967536926, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.616088947024199e-06, "logits/chosen": -2.155651092529297, "logits/rejected": -2.314833164215088, "logps/chosen": -206.03012084960938, "logps/rejected": -235.5647430419922, "loss": 0.6913, "rewards/accuracies": 0.5, "rewards/chosen": -0.09540672600269318, "rewards/margins": 0.050082337111234665, "rewards/rejected": -0.14548906683921814, "step": 800 }, { "epoch": 0.05, "eval_logits/chosen": -2.330695390701294, "eval_logits/rejected": -2.142754316329956, "eval_logps/chosen": -242.4707794189453, "eval_logps/rejected": -227.2015838623047, "eval_loss": 0.6913210153579712, "eval_rewards/accuracies": 0.597000002861023, "eval_rewards/chosen": -0.1046583503484726, "eval_rewards/margins": 0.051238518208265305, "eval_rewards/rejected": -0.155896857380867, "eval_runtime": 705.7692, "eval_samples_per_second": 2.834, "eval_steps_per_second": 1.417, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.6487900588620014e-06, "logits/chosen": -2.1080448627471924, "logits/rejected": -1.8789002895355225, "logps/chosen": -209.4484405517578, "logps/rejected": -169.5839080810547, "loss": 0.6932, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09332854300737381, "rewards/margins": 0.02387315407395363, "rewards/rejected": -0.11720170080661774, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.6814911706998042e-06, "logits/chosen": -2.293308734893799, "logits/rejected": -2.1208784580230713, "logps/chosen": -232.0146484375, "logps/rejected": -204.10064697265625, "loss": 0.6928, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06675419211387634, "rewards/margins": 0.028399985283613205, "rewards/rejected": -0.09515418112277985, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.7141922825376067e-06, "logits/chosen": -2.1897878646850586, "logits/rejected": -2.195380210876465, "logps/chosen": -264.03271484375, "logps/rejected": -280.67877197265625, "loss": 0.6917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06108593940734863, "rewards/margins": 0.06715109199285507, "rewards/rejected": -0.1282370388507843, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.746893394375409e-06, "logits/chosen": -2.3309171199798584, "logits/rejected": -2.2094645500183105, "logps/chosen": -239.76480102539062, "logps/rejected": -241.4962921142578, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.053703807294368744, "rewards/margins": 0.03512780740857124, "rewards/rejected": -0.08883161842823029, "step": 840 }, { "epoch": 0.06, "learning_rate": 2.779594506213211e-06, "logits/chosen": -2.236567258834839, "logits/rejected": -2.002687692642212, "logps/chosen": -194.21266174316406, "logps/rejected": -198.6126708984375, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": -0.04353756457567215, "rewards/margins": 0.047133009880781174, "rewards/rejected": -0.09067057073116302, "step": 850 }, { "epoch": 0.06, "learning_rate": 2.812295618051014e-06, "logits/chosen": -2.4146676063537598, "logits/rejected": -2.2537002563476562, "logps/chosen": -283.446533203125, "logps/rejected": -226.4826202392578, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.024853792041540146, "rewards/margins": 0.04763239994645119, "rewards/rejected": -0.07248619943857193, "step": 860 }, { "epoch": 0.06, "learning_rate": 2.8449967298888164e-06, "logits/chosen": -2.299940347671509, "logits/rejected": -2.095834732055664, "logps/chosen": -180.71926879882812, "logps/rejected": -155.87872314453125, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03895248472690582, "rewards/margins": 0.03217850998044014, "rewards/rejected": -0.07113099843263626, "step": 870 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -2.38145112991333, "logits/rejected": -2.282527208328247, "logps/chosen": -221.8948211669922, "logps/rejected": -208.37954711914062, "loss": 0.6917, "rewards/accuracies": 0.5, "rewards/chosen": -0.03059009648859501, "rewards/margins": 0.03299534320831299, "rewards/rejected": -0.06358544528484344, "step": 880 }, { "epoch": 0.06, "learning_rate": 2.9103989535644217e-06, "logits/chosen": -2.3034727573394775, "logits/rejected": -2.4145941734313965, "logps/chosen": -219.39413452148438, "logps/rejected": -252.17977905273438, "loss": 0.6925, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03910509869456291, "rewards/margins": 0.0225521232932806, "rewards/rejected": -0.06165723130106926, "step": 890 }, { "epoch": 0.06, "learning_rate": 2.943100065402224e-06, "logits/chosen": -2.3486392498016357, "logits/rejected": -2.202688455581665, "logps/chosen": -297.0926208496094, "logps/rejected": -285.2032470703125, "loss": 0.6921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04449423402547836, "rewards/margins": 0.03427337482571602, "rewards/rejected": -0.07876761257648468, "step": 900 }, { "epoch": 0.06, "eval_logits/chosen": -2.3343303203582764, "eval_logits/rejected": -2.146639347076416, "eval_logps/chosen": -237.26773071289062, "eval_logps/rejected": -221.73362731933594, "eval_loss": 0.6909335255622864, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": -0.05262775719165802, "eval_rewards/margins": 0.04858950152993202, "eval_rewards/rejected": -0.10121726244688034, "eval_runtime": 708.5552, "eval_samples_per_second": 2.823, "eval_steps_per_second": 1.411, "step": 900 }, { "epoch": 0.06, "learning_rate": 2.9758011772400266e-06, "logits/chosen": -2.2568647861480713, "logits/rejected": -2.2534162998199463, "logps/chosen": -263.49017333984375, "logps/rejected": -273.72900390625, "loss": 0.6923, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06180988624691963, "rewards/margins": 0.028642665594816208, "rewards/rejected": -0.09045255184173584, "step": 910 }, { "epoch": 0.06, "learning_rate": 3.0085022890778286e-06, "logits/chosen": -2.3166985511779785, "logits/rejected": -2.0645124912261963, "logps/chosen": -181.6991424560547, "logps/rejected": -157.57730102539062, "loss": 0.6918, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05959589406847954, "rewards/margins": 0.02801639400422573, "rewards/rejected": -0.08761228621006012, "step": 920 }, { "epoch": 0.06, "learning_rate": 3.0412034009156314e-06, "logits/chosen": -2.258695363998413, "logits/rejected": -2.4108707904815674, "logps/chosen": -240.5486297607422, "logps/rejected": -240.31802368164062, "loss": 0.6922, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04862401634454727, "rewards/margins": 0.040310461074113846, "rewards/rejected": -0.08893446624279022, "step": 930 }, { "epoch": 0.06, "learning_rate": 3.073904512753434e-06, "logits/chosen": -2.4486923217773438, "logits/rejected": -2.0814051628112793, "logps/chosen": -231.5814208984375, "logps/rejected": -202.97706604003906, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.01932818815112114, "rewards/margins": 0.06323892623186111, "rewards/rejected": -0.08256711810827255, "step": 940 }, { "epoch": 0.06, "learning_rate": 3.1066056245912363e-06, "logits/chosen": -2.308929920196533, "logits/rejected": -2.383852958679199, "logps/chosen": -233.48721313476562, "logps/rejected": -207.38430786132812, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.025032073259353638, "rewards/margins": 0.05856107547879219, "rewards/rejected": -0.08359314501285553, "step": 950 }, { "epoch": 0.06, "learning_rate": 3.1393067364290387e-06, "logits/chosen": -2.3422508239746094, "logits/rejected": -2.1356518268585205, "logps/chosen": -227.9677734375, "logps/rejected": -198.36422729492188, "loss": 0.6908, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.028410235419869423, "rewards/margins": 0.08982036262750626, "rewards/rejected": -0.11823059618473053, "step": 960 }, { "epoch": 0.06, "learning_rate": 3.1720078482668416e-06, "logits/chosen": -2.2739663124084473, "logits/rejected": -2.042900562286377, "logps/chosen": -214.7518310546875, "logps/rejected": -176.07008361816406, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.031811777502298355, "rewards/margins": 0.08425115048885345, "rewards/rejected": -0.11606292426586151, "step": 970 }, { "epoch": 0.06, "learning_rate": 3.204708960104644e-06, "logits/chosen": -2.3709425926208496, "logits/rejected": -2.0238471031188965, "logps/chosen": -218.3297119140625, "logps/rejected": -194.80873107910156, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": -0.009269696660339832, "rewards/margins": 0.09066729247570038, "rewards/rejected": -0.09993697702884674, "step": 980 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -2.4116098880767822, "logits/rejected": -2.1458096504211426, "logps/chosen": -208.81069946289062, "logps/rejected": -177.1934051513672, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.024496430531144142, "rewards/margins": 0.01946501061320305, "rewards/rejected": -0.04396144300699234, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.270111183780249e-06, "logits/chosen": -2.3320465087890625, "logits/rejected": -2.106236219406128, "logps/chosen": -201.52783203125, "logps/rejected": -188.52328491210938, "loss": 0.6903, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.015042750164866447, "rewards/margins": 0.04998449236154556, "rewards/rejected": -0.06502724438905716, "step": 1000 }, { "epoch": 0.07, "eval_logits/chosen": -2.3452556133270264, "eval_logits/rejected": -2.157487154006958, "eval_logps/chosen": -232.08251953125, "eval_logps/rejected": -217.237060546875, "eval_loss": 0.6907655000686646, "eval_rewards/accuracies": 0.6184999942779541, "eval_rewards/chosen": -0.0007757164421491325, "eval_rewards/margins": 0.05547565594315529, "eval_rewards/rejected": -0.05625137314200401, "eval_runtime": 709.7867, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.3028122956180513e-06, "logits/chosen": -2.202904462814331, "logits/rejected": -2.257141351699829, "logps/chosen": -211.8687744140625, "logps/rejected": -244.3396759033203, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0022913392167538404, "rewards/margins": 0.05001994967460632, "rewards/rejected": -0.047728605568408966, "step": 1010 }, { "epoch": 0.07, "learning_rate": 3.3355134074558538e-06, "logits/chosen": -2.197819948196411, "logits/rejected": -2.1015243530273438, "logps/chosen": -222.99203491210938, "logps/rejected": -202.92996215820312, "loss": 0.6929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.022592799738049507, "rewards/margins": 0.0552542582154274, "rewards/rejected": -0.07784706354141235, "step": 1020 }, { "epoch": 0.07, "learning_rate": 3.368214519293656e-06, "logits/chosen": -2.1490020751953125, "logits/rejected": -1.9399007558822632, "logps/chosen": -200.19967651367188, "logps/rejected": -184.1601104736328, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.012293432839214802, "rewards/margins": 0.04858936741948128, "rewards/rejected": -0.060882795602083206, "step": 1030 }, { "epoch": 0.07, "learning_rate": 3.400915631131459e-06, "logits/chosen": -2.1254327297210693, "logits/rejected": -2.1563661098480225, "logps/chosen": -200.5404052734375, "logps/rejected": -237.29293823242188, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": -0.025679444894194603, "rewards/margins": 0.08101227134466171, "rewards/rejected": -0.10669170320034027, "step": 1040 }, { "epoch": 0.07, "learning_rate": 3.4336167429692615e-06, "logits/chosen": -2.3749256134033203, "logits/rejected": -2.1596624851226807, "logps/chosen": -216.2527618408203, "logps/rejected": -205.42910766601562, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.017023511230945587, "rewards/margins": 0.054301291704177856, "rewards/rejected": -0.07132480293512344, "step": 1050 }, { "epoch": 0.07, "learning_rate": 3.4663178548070635e-06, "logits/chosen": -2.1830973625183105, "logits/rejected": -2.239147424697876, "logps/chosen": -214.9247589111328, "logps/rejected": -203.87339782714844, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0002921056002378464, "rewards/margins": 0.053903043270111084, "rewards/rejected": -0.05361093208193779, "step": 1060 }, { "epoch": 0.07, "learning_rate": 3.499018966644866e-06, "logits/chosen": -2.3346736431121826, "logits/rejected": -2.139986038208008, "logps/chosen": -193.098388671875, "logps/rejected": -192.88113403320312, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012733638286590576, "rewards/margins": 0.06595107167959213, "rewards/rejected": -0.0786847174167633, "step": 1070 }, { "epoch": 0.07, "learning_rate": 3.531720078482669e-06, "logits/chosen": -2.2687487602233887, "logits/rejected": -1.93032968044281, "logps/chosen": -225.96875, "logps/rejected": -222.5755615234375, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0009903141763061285, "rewards/margins": 0.07838054746389389, "rewards/rejected": -0.07937086373567581, "step": 1080 }, { "epoch": 0.07, "learning_rate": 3.5644211903204712e-06, "logits/chosen": -2.2722859382629395, "logits/rejected": -2.3190114498138428, "logps/chosen": -195.7224884033203, "logps/rejected": -204.1152801513672, "loss": 0.6904, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.031599871814250946, "rewards/margins": 0.05579303950071335, "rewards/rejected": -0.024193167686462402, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -2.429405927658081, "logits/rejected": -2.290315866470337, "logps/chosen": -257.29278564453125, "logps/rejected": -203.24649047851562, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.014919871464371681, "rewards/margins": 0.030690353363752365, "rewards/rejected": -0.045610226690769196, "step": 1100 }, { "epoch": 0.07, "eval_logits/chosen": -2.3150722980499268, "eval_logits/rejected": -2.1294045448303223, "eval_logps/chosen": -232.15647888183594, "eval_logps/rejected": -219.40240478515625, "eval_loss": 0.691135585308075, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.0015151738189160824, "eval_rewards/margins": 0.07638993859291077, "eval_rewards/rejected": -0.07790511101484299, "eval_runtime": 706.9471, "eval_samples_per_second": 2.829, "eval_steps_per_second": 1.415, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.6298234139960765e-06, "logits/chosen": -2.3906145095825195, "logits/rejected": -2.036782741546631, "logps/chosen": -216.01156616210938, "logps/rejected": -173.99920654296875, "loss": 0.6892, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.017574917525053024, "rewards/margins": 0.07658959180116653, "rewards/rejected": -0.05901466682553291, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.6625245258338785e-06, "logits/chosen": -2.191586971282959, "logits/rejected": -2.110297679901123, "logps/chosen": -233.83700561523438, "logps/rejected": -320.0827331542969, "loss": 0.6854, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0494137704372406, "rewards/margins": 0.09785051643848419, "rewards/rejected": -0.04843674600124359, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.695225637671681e-06, "logits/chosen": -2.442321300506592, "logits/rejected": -2.241528272628784, "logps/chosen": -205.39053344726562, "logps/rejected": -178.85687255859375, "loss": 0.6897, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.018860872834920883, "rewards/margins": 0.06542352586984634, "rewards/rejected": -0.04656265676021576, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.7279267495094834e-06, "logits/chosen": -2.358170986175537, "logits/rejected": -2.220486879348755, "logps/chosen": -159.06790161132812, "logps/rejected": -177.91981506347656, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.013590176589787006, "rewards/margins": 0.08726686984300613, "rewards/rejected": -0.0736766904592514, "step": 1140 }, { "epoch": 0.08, "learning_rate": 3.7606278613472863e-06, "logits/chosen": -2.4245522022247314, "logits/rejected": -2.0788369178771973, "logps/chosen": -277.3457946777344, "logps/rejected": -205.87890625, "loss": 0.6928, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.04925479739904404, "rewards/margins": 0.061012279242277145, "rewards/rejected": -0.01175748836249113, "step": 1150 }, { "epoch": 0.08, "learning_rate": 3.7933289731850887e-06, "logits/chosen": -2.227184772491455, "logits/rejected": -2.006626605987549, "logps/chosen": -220.76419067382812, "logps/rejected": -200.02944946289062, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": 0.07796461135149002, "rewards/margins": 0.07523587346076965, "rewards/rejected": 0.002728732768446207, "step": 1160 }, { "epoch": 0.08, "learning_rate": 3.826030085022891e-06, "logits/chosen": -2.394782543182373, "logits/rejected": -2.1593916416168213, "logps/chosen": -244.3356475830078, "logps/rejected": -212.73583984375, "loss": 0.6916, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0331416018307209, "rewards/margins": 0.04552285000681877, "rewards/rejected": -0.012381250038743019, "step": 1170 }, { "epoch": 0.08, "learning_rate": 3.858731196860693e-06, "logits/chosen": -2.4333834648132324, "logits/rejected": -1.9844642877578735, "logps/chosen": -249.99893188476562, "logps/rejected": -210.4248504638672, "loss": 0.6925, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.008344946429133415, "rewards/margins": 0.026615191251039505, "rewards/rejected": -0.01827024295926094, "step": 1180 }, { "epoch": 0.08, "learning_rate": 3.891432308698496e-06, "logits/chosen": -2.120863437652588, "logits/rejected": -2.063352108001709, "logps/chosen": -193.93240356445312, "logps/rejected": -226.5436553955078, "loss": 0.6876, "rewards/accuracies": 0.625, "rewards/chosen": 0.01297982782125473, "rewards/margins": 0.07365990430116653, "rewards/rejected": -0.060680072754621506, "step": 1190 }, { "epoch": 0.08, "learning_rate": 3.924133420536299e-06, "logits/chosen": -2.2395856380462646, "logits/rejected": -2.041903018951416, "logps/chosen": -184.75523376464844, "logps/rejected": -157.53878784179688, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.026105085387825966, "rewards/margins": 0.061862241476774216, "rewards/rejected": -0.08796733617782593, "step": 1200 }, { "epoch": 0.08, "eval_logits/chosen": -2.3272151947021484, "eval_logits/rejected": -2.1397786140441895, "eval_logps/chosen": -234.7645263671875, "eval_logps/rejected": -221.4020538330078, "eval_loss": 0.6907363533973694, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": -0.027595827355980873, "eval_rewards/margins": 0.07030569016933441, "eval_rewards/rejected": -0.09790151566267014, "eval_runtime": 706.2261, "eval_samples_per_second": 2.832, "eval_steps_per_second": 1.416, "step": 1200 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -2.4392762184143066, "logits/rejected": -2.0458781719207764, "logps/chosen": -206.30532836914062, "logps/rejected": -173.46829223632812, "loss": 0.6875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.010426463559269905, "rewards/margins": 0.11114281415939331, "rewards/rejected": -0.12156929075717926, "step": 1210 }, { "epoch": 0.08, "learning_rate": 3.989535644211904e-06, "logits/chosen": -2.329332113265991, "logits/rejected": -2.0607223510742188, "logps/chosen": -216.85635375976562, "logps/rejected": -202.00527954101562, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.00270930677652359, "rewards/margins": 0.07769973576068878, "rewards/rejected": -0.07499042898416519, "step": 1220 }, { "epoch": 0.08, "learning_rate": 4.022236756049706e-06, "logits/chosen": -2.5103940963745117, "logits/rejected": -2.1222851276397705, "logps/chosen": -273.70758056640625, "logps/rejected": -246.398681640625, "loss": 0.691, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03429547697305679, "rewards/margins": 0.05166538804769516, "rewards/rejected": -0.017369914799928665, "step": 1230 }, { "epoch": 0.08, "learning_rate": 4.054937867887509e-06, "logits/chosen": -2.3860554695129395, "logits/rejected": -1.9378671646118164, "logps/chosen": -219.5018310546875, "logps/rejected": -214.9021759033203, "loss": 0.6907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04764958843588829, "rewards/margins": 0.06188775226473808, "rewards/rejected": -0.014238161034882069, "step": 1240 }, { "epoch": 0.08, "learning_rate": 4.087638979725311e-06, "logits/chosen": -2.3056764602661133, "logits/rejected": -2.371333599090576, "logps/chosen": -224.9440155029297, "logps/rejected": -230.5066375732422, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": 0.02895757555961609, "rewards/margins": 0.04107601195573807, "rewards/rejected": -0.01211843267083168, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.1203400915631135e-06, "logits/chosen": -2.221919059753418, "logits/rejected": -2.146714687347412, "logps/chosen": -237.0450439453125, "logps/rejected": -214.48135375976562, "loss": 0.6933, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002859347965568304, "rewards/margins": 0.05717097595334053, "rewards/rejected": -0.05431162193417549, "step": 1260 }, { "epoch": 0.08, "learning_rate": 4.153041203400916e-06, "logits/chosen": -2.4315924644470215, "logits/rejected": -2.265227794647217, "logps/chosen": -259.8785705566406, "logps/rejected": -239.6594696044922, "loss": 0.693, "rewards/accuracies": 0.625, "rewards/chosen": 0.04543764516711235, "rewards/margins": 0.044155552983284, "rewards/rejected": 0.0012820929987356067, "step": 1270 }, { "epoch": 0.08, "learning_rate": 4.185742315238718e-06, "logits/chosen": -2.343573570251465, "logits/rejected": -2.0737881660461426, "logps/chosen": -194.74240112304688, "logps/rejected": -188.17759704589844, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": 0.03359198197722435, "rewards/margins": 0.05302148312330246, "rewards/rejected": -0.01942949742078781, "step": 1280 }, { "epoch": 0.08, "learning_rate": 4.218443427076521e-06, "logits/chosen": -2.289780616760254, "logits/rejected": -2.1402783393859863, "logps/chosen": -181.81228637695312, "logps/rejected": -185.71270751953125, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": 0.00946279987692833, "rewards/margins": 0.02429373934864998, "rewards/rejected": -0.014830941334366798, "step": 1290 }, { "epoch": 0.09, "learning_rate": 4.251144538914323e-06, "logits/chosen": -2.2517518997192383, "logits/rejected": -2.113873243331909, "logps/chosen": -218.7006378173828, "logps/rejected": -213.1343536376953, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.018896425142884254, "rewards/margins": 0.07629399746656418, "rewards/rejected": -0.05739758163690567, "step": 1300 }, { "epoch": 0.09, "eval_logits/chosen": -2.3501036167144775, "eval_logits/rejected": -2.161346673965454, "eval_logps/chosen": -230.54745483398438, "eval_logps/rejected": -215.89462280273438, "eval_loss": 0.6907321810722351, "eval_rewards/accuracies": 0.6104999780654907, "eval_rewards/chosen": 0.014575082808732986, "eval_rewards/margins": 0.05740221589803696, "eval_rewards/rejected": -0.04282712936401367, "eval_runtime": 708.7595, "eval_samples_per_second": 2.822, "eval_steps_per_second": 1.411, "step": 1300 }, { "epoch": 0.09, "learning_rate": 4.283845650752126e-06, "logits/chosen": -2.379955291748047, "logits/rejected": -2.1668858528137207, "logps/chosen": -285.83856201171875, "logps/rejected": -227.052490234375, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.003734259633347392, "rewards/margins": 0.05405404418706894, "rewards/rejected": -0.05031978338956833, "step": 1310 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -2.3057024478912354, "logits/rejected": -2.2258567810058594, "logps/chosen": -207.4925537109375, "logps/rejected": -178.53604125976562, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004349817987531424, "rewards/margins": 0.05341663956642151, "rewards/rejected": -0.04906681925058365, "step": 1320 }, { "epoch": 0.09, "learning_rate": 4.349247874427731e-06, "logits/chosen": -2.3624565601348877, "logits/rejected": -2.0772993564605713, "logps/chosen": -228.140869140625, "logps/rejected": -275.2492980957031, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01606837287545204, "rewards/margins": 0.07025826722383499, "rewards/rejected": -0.08632663637399673, "step": 1330 }, { "epoch": 0.09, "learning_rate": 4.381948986265534e-06, "logits/chosen": -2.505383253097534, "logits/rejected": -2.2410616874694824, "logps/chosen": -262.94122314453125, "logps/rejected": -259.1203918457031, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.0009654685854911804, "rewards/margins": 0.09186152368783951, "rewards/rejected": -0.0928269773721695, "step": 1340 }, { "epoch": 0.09, "learning_rate": 4.414650098103336e-06, "logits/chosen": -2.652198314666748, "logits/rejected": -2.3813252449035645, "logps/chosen": -260.88726806640625, "logps/rejected": -242.92202758789062, "loss": 0.6909, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01065239030867815, "rewards/margins": 0.04458843916654587, "rewards/rejected": -0.03393604978919029, "step": 1350 }, { "epoch": 0.09, "learning_rate": 4.447351209941138e-06, "logits/chosen": -2.3565680980682373, "logits/rejected": -2.0831761360168457, "logps/chosen": -215.17153930664062, "logps/rejected": -202.86697387695312, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05506874993443489, "rewards/margins": 0.05836993455886841, "rewards/rejected": -0.1134386882185936, "step": 1360 }, { "epoch": 0.09, "learning_rate": 4.480052321778941e-06, "logits/chosen": -2.305148124694824, "logits/rejected": -2.3135273456573486, "logps/chosen": -226.86849975585938, "logps/rejected": -212.960205078125, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0004178326635155827, "rewards/margins": 0.06787695735692978, "rewards/rejected": -0.06745912879705429, "step": 1370 }, { "epoch": 0.09, "learning_rate": 4.5127534336167435e-06, "logits/chosen": -2.3873705863952637, "logits/rejected": -2.1096668243408203, "logps/chosen": -252.4434814453125, "logps/rejected": -214.6434783935547, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052694983780384064, "rewards/margins": 0.058600615710020065, "rewards/rejected": -0.11129560321569443, "step": 1380 }, { "epoch": 0.09, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -2.356358766555786, "logits/rejected": -2.299755096435547, "logps/chosen": -168.05401611328125, "logps/rejected": -167.7144775390625, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.014046875759959221, "rewards/margins": 0.0986950695514679, "rewards/rejected": -0.08464818447828293, "step": 1390 }, { "epoch": 0.09, "learning_rate": 4.578155657292348e-06, "logits/chosen": -2.326817274093628, "logits/rejected": -2.2857398986816406, "logps/chosen": -253.63827514648438, "logps/rejected": -270.8973083496094, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": 0.017673691734671593, "rewards/margins": 0.08337118476629257, "rewards/rejected": -0.06569750607013702, "step": 1400 }, { "epoch": 0.09, "eval_logits/chosen": -2.3205173015594482, "eval_logits/rejected": -2.1350162029266357, "eval_logps/chosen": -231.281494140625, "eval_logps/rejected": -217.48509216308594, "eval_loss": 0.69090336561203, "eval_rewards/accuracies": 0.6129999756813049, "eval_rewards/chosen": 0.007234419696033001, "eval_rewards/margins": 0.06596639752388, "eval_rewards/rejected": -0.058731988072395325, "eval_runtime": 710.954, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 1400 }, { "epoch": 0.09, "learning_rate": 4.610856769130151e-06, "logits/chosen": -2.3867039680480957, "logits/rejected": -2.243751049041748, "logps/chosen": -238.0634002685547, "logps/rejected": -215.328857421875, "loss": 0.6921, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.010808589868247509, "rewards/margins": 0.06767071038484573, "rewards/rejected": -0.05686211585998535, "step": 1410 }, { "epoch": 0.09, "learning_rate": 4.643557880967953e-06, "logits/chosen": -2.3865418434143066, "logits/rejected": -2.1880135536193848, "logps/chosen": -200.36758422851562, "logps/rejected": -201.47171020507812, "loss": 0.6911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.015687663108110428, "rewards/margins": 0.03603903204202652, "rewards/rejected": -0.05172669142484665, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -2.345919609069824, "logits/rejected": -2.072216510772705, "logps/chosen": -267.54669189453125, "logps/rejected": -233.08798217773438, "loss": 0.69, "rewards/accuracies": 0.75, "rewards/chosen": 0.02073250710964203, "rewards/margins": 0.07689642906188965, "rewards/rejected": -0.05616391822695732, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.708960104643558e-06, "logits/chosen": -2.3562963008880615, "logits/rejected": -2.3137621879577637, "logps/chosen": -306.24517822265625, "logps/rejected": -274.78741455078125, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00035466160625219345, "rewards/margins": 0.05302319675683975, "rewards/rejected": -0.05337785556912422, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.741661216481361e-06, "logits/chosen": -2.3189618587493896, "logits/rejected": -2.2773196697235107, "logps/chosen": -237.4014129638672, "logps/rejected": -258.08966064453125, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010558743961155415, "rewards/margins": 0.09441892802715302, "rewards/rejected": -0.10497768223285675, "step": 1450 }, { "epoch": 0.1, "learning_rate": 4.774362328319163e-06, "logits/chosen": -2.298546552658081, "logits/rejected": -2.144091844558716, "logps/chosen": -217.25430297851562, "logps/rejected": -191.17282104492188, "loss": 0.6933, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.014642052352428436, "rewards/margins": 0.038679804652929306, "rewards/rejected": -0.05332186073064804, "step": 1460 }, { "epoch": 0.1, "learning_rate": 4.807063440156966e-06, "logits/chosen": -2.33768367767334, "logits/rejected": -2.1054131984710693, "logps/chosen": -260.9105529785156, "logps/rejected": -208.8505401611328, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02398931235074997, "rewards/margins": 0.09310416132211685, "rewards/rejected": -0.06911484152078629, "step": 1470 }, { "epoch": 0.1, "learning_rate": 4.839764551994769e-06, "logits/chosen": -2.4007508754730225, "logits/rejected": -2.095411539077759, "logps/chosen": -246.15673828125, "logps/rejected": -215.8951873779297, "loss": 0.6919, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.025517677888274193, "rewards/margins": 0.07472027838230133, "rewards/rejected": -0.04920259863138199, "step": 1480 }, { "epoch": 0.1, "learning_rate": 4.872465663832571e-06, "logits/chosen": -2.2471413612365723, "logits/rejected": -2.1273739337921143, "logps/chosen": -207.810302734375, "logps/rejected": -194.42510986328125, "loss": 0.6898, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0005180038278922439, "rewards/margins": 0.07074950635433197, "rewards/rejected": -0.0712675154209137, "step": 1490 }, { "epoch": 0.1, "learning_rate": 4.905166775670373e-06, "logits/chosen": -2.372222661972046, "logits/rejected": -2.034890651702881, "logps/chosen": -220.8203582763672, "logps/rejected": -200.08526611328125, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.00939613301306963, "rewards/margins": 0.06658481061458588, "rewards/rejected": -0.057188671082258224, "step": 1500 }, { "epoch": 0.1, "eval_logits/chosen": -2.321709394454956, "eval_logits/rejected": -2.1364989280700684, "eval_logps/chosen": -233.14047241210938, "eval_logps/rejected": -220.0596923828125, "eval_loss": 0.6906663179397583, "eval_rewards/accuracies": 0.6305000185966492, "eval_rewards/chosen": -0.011355271562933922, "eval_rewards/margins": 0.07312270253896713, "eval_rewards/rejected": -0.0844779685139656, "eval_runtime": 714.039, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9378678875081756e-06, "logits/chosen": -2.4643445014953613, "logits/rejected": -2.1634469032287598, "logps/chosen": -214.7722625732422, "logps/rejected": -191.8986358642578, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.000286552298348397, "rewards/margins": 0.08134503662586212, "rewards/rejected": -0.081631600856781, "step": 1510 }, { "epoch": 0.1, "learning_rate": 4.9705689993459784e-06, "logits/chosen": -2.4508216381073, "logits/rejected": -2.0175795555114746, "logps/chosen": -207.11288452148438, "logps/rejected": -161.23849487304688, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": -0.021875491365790367, "rewards/margins": 0.11838686466217041, "rewards/rejected": -0.14026235044002533, "step": 1520 }, { "epoch": 0.1, "learning_rate": 4.999999934793849e-06, "logits/chosen": -2.350825309753418, "logits/rejected": -2.257450580596924, "logps/chosen": -248.128173828125, "logps/rejected": -223.34884643554688, "loss": 0.6916, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.0017488065641373396, "rewards/margins": 0.04637077450752258, "rewards/rejected": -0.04811957851052284, "step": 1530 }, { "epoch": 0.1, "learning_rate": 4.999992110059814e-06, "logits/chosen": -2.321943759918213, "logits/rejected": -2.3055014610290527, "logps/chosen": -277.02490234375, "logps/rejected": -263.4502258300781, "loss": 0.6915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.00662571843713522, "rewards/margins": 0.06538694351911545, "rewards/rejected": -0.058761220425367355, "step": 1540 }, { "epoch": 0.1, "learning_rate": 4.999971244142299e-06, "logits/chosen": -2.4726457595825195, "logits/rejected": -2.1803183555603027, "logps/chosen": -269.8371276855469, "logps/rejected": -237.511962890625, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03615967929363251, "rewards/margins": 0.06921084225177765, "rewards/rejected": -0.03305116295814514, "step": 1550 }, { "epoch": 0.1, "learning_rate": 4.999937337150149e-06, "logits/chosen": -2.155517578125, "logits/rejected": -2.115971803665161, "logps/chosen": -232.05459594726562, "logps/rejected": -219.97708129882812, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.024602752178907394, "rewards/margins": 0.03303222730755806, "rewards/rejected": -0.008429473266005516, "step": 1560 }, { "epoch": 0.1, "learning_rate": 4.99989038926024e-06, "logits/chosen": -2.11495304107666, "logits/rejected": -2.245858669281006, "logps/chosen": -199.39358520507812, "logps/rejected": -206.3715057373047, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": -0.009655221365392208, "rewards/margins": 0.047758929431438446, "rewards/rejected": -0.05741415172815323, "step": 1570 }, { "epoch": 0.1, "learning_rate": 4.999830400717476e-06, "logits/chosen": -2.3075220584869385, "logits/rejected": -2.172712802886963, "logps/chosen": -287.1651306152344, "logps/rejected": -280.3276672363281, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0022194921039044857, "rewards/margins": 0.0943801999092102, "rewards/rejected": -0.09216071665287018, "step": 1580 }, { "epoch": 0.1, "learning_rate": 4.999757371834787e-06, "logits/chosen": -2.1875932216644287, "logits/rejected": -2.094125747680664, "logps/chosen": -241.1585693359375, "logps/rejected": -238.26773071289062, "loss": 0.6885, "rewards/accuracies": 0.75, "rewards/chosen": -0.013396786525845528, "rewards/margins": 0.126246377825737, "rewards/rejected": -0.13964316248893738, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.999671302993125e-06, "logits/chosen": -2.134300470352173, "logits/rejected": -2.071805477142334, "logps/chosen": -248.94729614257812, "logps/rejected": -272.7432861328125, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.003668667282909155, "rewards/margins": 0.08407886326313019, "rewards/rejected": -0.08774752914905548, "step": 1600 }, { "epoch": 0.1, "eval_logits/chosen": -2.327033042907715, "eval_logits/rejected": -2.141350269317627, "eval_logps/chosen": -227.57763671875, "eval_logps/rejected": -214.50515747070312, "eval_loss": 0.6906238794326782, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": 0.04427312687039375, "eval_rewards/margins": 0.07320577651262283, "eval_rewards/rejected": -0.028932644054293633, "eval_runtime": 706.6876, "eval_samples_per_second": 2.83, "eval_steps_per_second": 1.415, "step": 1600 }, { "epoch": 0.11, "learning_rate": 4.999572194641471e-06, "logits/chosen": -2.291485071182251, "logits/rejected": -2.1361746788024902, "logps/chosen": -271.530029296875, "logps/rejected": -228.67013549804688, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03988269716501236, "rewards/margins": 0.09610097855329514, "rewards/rejected": -0.056218285113573074, "step": 1610 }, { "epoch": 0.11, "learning_rate": 4.999460047296819e-06, "logits/chosen": -2.2640249729156494, "logits/rejected": -2.134519577026367, "logps/chosen": -213.858154296875, "logps/rejected": -200.5656280517578, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01746007241308689, "rewards/margins": 0.07616613805294037, "rewards/rejected": -0.05870606750249863, "step": 1620 }, { "epoch": 0.11, "learning_rate": 4.999334861544186e-06, "logits/chosen": -2.381791353225708, "logits/rejected": -2.045926570892334, "logps/chosen": -226.28970336914062, "logps/rejected": -178.6829833984375, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.031635530292987823, "rewards/margins": 0.0839659571647644, "rewards/rejected": -0.05233042314648628, "step": 1630 }, { "epoch": 0.11, "learning_rate": 4.999196638036604e-06, "logits/chosen": -2.4527511596679688, "logits/rejected": -2.2551088333129883, "logps/chosen": -281.4839782714844, "logps/rejected": -249.938232421875, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05931050330400467, "rewards/margins": 0.03690633550286293, "rewards/rejected": 0.02240416780114174, "step": 1640 }, { "epoch": 0.11, "learning_rate": 4.999045377495111e-06, "logits/chosen": -2.117143154144287, "logits/rejected": -2.3743062019348145, "logps/chosen": -170.4903106689453, "logps/rejected": -265.4920654296875, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03844357654452324, "rewards/margins": 0.07219245284795761, "rewards/rejected": -0.033748872578144073, "step": 1650 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -2.2666547298431396, "logits/rejected": -2.1885650157928467, "logps/chosen": -239.72262573242188, "logps/rejected": -212.6611328125, "loss": 0.6933, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00598920788615942, "rewards/margins": 0.03883373737335205, "rewards/rejected": -0.03284453600645065, "step": 1660 }, { "epoch": 0.11, "learning_rate": 4.998703748534599e-06, "logits/chosen": -2.0997233390808105, "logits/rejected": -1.8399873971939087, "logps/chosen": -240.05831909179688, "logps/rejected": -196.15187072753906, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.005714725703001022, "rewards/margins": 0.06784109026193619, "rewards/rejected": -0.062126368284225464, "step": 1670 }, { "epoch": 0.11, "learning_rate": 4.998513381897683e-06, "logits/chosen": -2.338465452194214, "logits/rejected": -2.107517719268799, "logps/chosen": -237.6542205810547, "logps/rejected": -185.0458984375, "loss": 0.6911, "rewards/accuracies": 0.5, "rewards/chosen": 0.015895305201411247, "rewards/margins": 0.04385993629693985, "rewards/rejected": -0.027964631095528603, "step": 1680 }, { "epoch": 0.11, "learning_rate": 4.9983099817910565e-06, "logits/chosen": -2.259268283843994, "logits/rejected": -2.0765738487243652, "logps/chosen": -243.1163330078125, "logps/rejected": -246.34951782226562, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02145574428141117, "rewards/margins": 0.07711198180913925, "rewards/rejected": -0.05565624311566353, "step": 1690 }, { "epoch": 0.11, "learning_rate": 4.998093549275754e-06, "logits/chosen": -2.2384533882141113, "logits/rejected": -2.2348380088806152, "logps/chosen": -258.17047119140625, "logps/rejected": -287.0838317871094, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.02163517475128174, "rewards/margins": 0.07120385020971298, "rewards/rejected": -0.049568675458431244, "step": 1700 }, { "epoch": 0.11, "eval_logits/chosen": -2.3420772552490234, "eval_logits/rejected": -2.1548047065734863, "eval_logps/chosen": -228.6732940673828, "eval_logps/rejected": -215.70216369628906, "eval_loss": 0.6903870701789856, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": 0.033316612243652344, "eval_rewards/margins": 0.07421907782554626, "eval_rewards/rejected": -0.04090247303247452, "eval_runtime": 706.8309, "eval_samples_per_second": 2.83, "eval_steps_per_second": 1.415, "step": 1700 }, { "epoch": 0.11, "learning_rate": 4.997864085480794e-06, "logits/chosen": -2.372429370880127, "logits/rejected": -2.201474189758301, "logps/chosen": -263.83319091796875, "logps/rejected": -253.3795166015625, "loss": 0.6936, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.025129973888397217, "rewards/margins": 0.08083225041627884, "rewards/rejected": -0.05570227652788162, "step": 1710 }, { "epoch": 0.11, "learning_rate": 4.997621591603171e-06, "logits/chosen": -2.3386642932891846, "logits/rejected": -2.1461567878723145, "logps/chosen": -160.34231567382812, "logps/rejected": -167.763916015625, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023095201700925827, "rewards/margins": 0.08106372505426407, "rewards/rejected": -0.1041589230298996, "step": 1720 }, { "epoch": 0.11, "learning_rate": 4.997366068907853e-06, "logits/chosen": -2.3146162033081055, "logits/rejected": -2.260533094406128, "logps/chosen": -257.2478942871094, "logps/rejected": -243.188232421875, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.011618571355938911, "rewards/margins": 0.05815444514155388, "rewards/rejected": -0.04653587192296982, "step": 1730 }, { "epoch": 0.11, "learning_rate": 4.997097518727771e-06, "logits/chosen": -2.366065263748169, "logits/rejected": -2.1315550804138184, "logps/chosen": -224.10079956054688, "logps/rejected": -197.63809204101562, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011443842202425003, "rewards/margins": 0.06659694015979767, "rewards/rejected": -0.07804077863693237, "step": 1740 }, { "epoch": 0.11, "learning_rate": 4.9968159424638155e-06, "logits/chosen": -2.2000982761383057, "logits/rejected": -2.418351650238037, "logps/chosen": -218.63314819335938, "logps/rejected": -273.26837158203125, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.010267667472362518, "rewards/margins": 0.02790077030658722, "rewards/rejected": -0.03816843777894974, "step": 1750 }, { "epoch": 0.12, "learning_rate": 4.9965213415848235e-06, "logits/chosen": -2.284058094024658, "logits/rejected": -1.9068619012832642, "logps/chosen": -231.62319946289062, "logps/rejected": -201.48184204101562, "loss": 0.6907, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03218645602464676, "rewards/margins": 0.06718595325946808, "rewards/rejected": -0.09937240928411484, "step": 1760 }, { "epoch": 0.12, "learning_rate": 4.9962137176275805e-06, "logits/chosen": -2.3636927604675293, "logits/rejected": -2.164499044418335, "logps/chosen": -232.9027862548828, "logps/rejected": -236.94198608398438, "loss": 0.6934, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.013926369138062, "rewards/margins": 0.04597216844558716, "rewards/rejected": -0.03204580023884773, "step": 1770 }, { "epoch": 0.12, "learning_rate": 4.9958930721968015e-06, "logits/chosen": -2.2207086086273193, "logits/rejected": -2.3023934364318848, "logps/chosen": -210.31875610351562, "logps/rejected": -226.93685913085938, "loss": 0.692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017164241522550583, "rewards/margins": 0.0511639229953289, "rewards/rejected": -0.03399968519806862, "step": 1780 }, { "epoch": 0.12, "learning_rate": 4.995559406965132e-06, "logits/chosen": -2.4438416957855225, "logits/rejected": -2.0871827602386475, "logps/chosen": -229.1040802001953, "logps/rejected": -204.32778930664062, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02811221405863762, "rewards/margins": 0.04839346557855606, "rewards/rejected": -0.02028125710785389, "step": 1790 }, { "epoch": 0.12, "learning_rate": 4.995212723673131e-06, "logits/chosen": -2.408142566680908, "logits/rejected": -2.172886848449707, "logps/chosen": -225.9892578125, "logps/rejected": -183.36489868164062, "loss": 0.6904, "rewards/accuracies": 0.75, "rewards/chosen": 0.04213835671544075, "rewards/margins": 0.07358547300100327, "rewards/rejected": -0.031447120010852814, "step": 1800 }, { "epoch": 0.12, "eval_logits/chosen": -2.3330607414245605, "eval_logits/rejected": -2.146653652191162, "eval_logps/chosen": -227.91104125976562, "eval_logps/rejected": -213.03692626953125, "eval_loss": 0.6908692717552185, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": 0.040939364582300186, "eval_rewards/margins": 0.055189553648233414, "eval_rewards/rejected": -0.014250185340642929, "eval_runtime": 706.2668, "eval_samples_per_second": 2.832, "eval_steps_per_second": 1.416, "step": 1800 }, { "epoch": 0.12, "learning_rate": 4.99485302412927e-06, "logits/chosen": -2.090059280395508, "logits/rejected": -2.0199811458587646, "logps/chosen": -204.09255981445312, "logps/rejected": -211.0037078857422, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.039184752851724625, "rewards/margins": 0.06218431517481804, "rewards/rejected": -0.022999566048383713, "step": 1810 }, { "epoch": 0.12, "learning_rate": 4.994480310209918e-06, "logits/chosen": -2.297668933868408, "logits/rejected": -2.459190845489502, "logps/chosen": -235.6026611328125, "logps/rejected": -254.4353485107422, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0638035237789154, "rewards/margins": 0.05078417807817459, "rewards/rejected": 0.013019341044127941, "step": 1820 }, { "epoch": 0.12, "learning_rate": 4.994094583859332e-06, "logits/chosen": -2.326498031616211, "logits/rejected": -2.0701937675476074, "logps/chosen": -157.9686737060547, "logps/rejected": -194.639892578125, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.069440558552742, "rewards/margins": 0.0464170016348362, "rewards/rejected": 0.023023560643196106, "step": 1830 }, { "epoch": 0.12, "learning_rate": 4.9936958470896525e-06, "logits/chosen": -2.334303617477417, "logits/rejected": -2.089700698852539, "logps/chosen": -221.90780639648438, "logps/rejected": -187.50491333007812, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.018373683094978333, "rewards/margins": 0.08500251919031143, "rewards/rejected": -0.0666288360953331, "step": 1840 }, { "epoch": 0.12, "learning_rate": 4.993284101980883e-06, "logits/chosen": -2.2909703254699707, "logits/rejected": -2.0997474193573, "logps/chosen": -244.8101348876953, "logps/rejected": -217.73196411132812, "loss": 0.6859, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.023147549480199814, "rewards/margins": 0.12554757297039032, "rewards/rejected": -0.14869512617588043, "step": 1850 }, { "epoch": 0.12, "learning_rate": 4.9928593506808885e-06, "logits/chosen": -2.40543794631958, "logits/rejected": -2.2108352184295654, "logps/chosen": -258.2170715332031, "logps/rejected": -228.532958984375, "loss": 0.6916, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.014026492834091187, "rewards/margins": 0.07293161749839783, "rewards/rejected": -0.08695811778306961, "step": 1860 }, { "epoch": 0.12, "learning_rate": 4.992421595405381e-06, "logits/chosen": -2.3434250354766846, "logits/rejected": -2.095716714859009, "logps/chosen": -224.97021484375, "logps/rejected": -158.5146942138672, "loss": 0.6933, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.03222180902957916, "rewards/margins": 0.04391375184059143, "rewards/rejected": -0.011691942811012268, "step": 1870 }, { "epoch": 0.12, "learning_rate": 4.991970838437905e-06, "logits/chosen": -2.2580971717834473, "logits/rejected": -2.1631343364715576, "logps/chosen": -220.3092803955078, "logps/rejected": -255.4618377685547, "loss": 0.6898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0502026192843914, "rewards/margins": 0.07047822326421738, "rewards/rejected": -0.020275603979825974, "step": 1880 }, { "epoch": 0.12, "learning_rate": 4.9915070821298294e-06, "logits/chosen": -2.366753339767456, "logits/rejected": -2.039536237716675, "logps/chosen": -162.96090698242188, "logps/rejected": -157.37863159179688, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04008902981877327, "rewards/margins": 0.03506173565983772, "rewards/rejected": 0.0050272950902581215, "step": 1890 }, { "epoch": 0.12, "learning_rate": 4.991030328900336e-06, "logits/chosen": -2.295825481414795, "logits/rejected": -2.040485143661499, "logps/chosen": -278.2053527832031, "logps/rejected": -216.57290649414062, "loss": 0.6908, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06363843381404877, "rewards/margins": 0.08403732627630234, "rewards/rejected": -0.02039889432489872, "step": 1900 }, { "epoch": 0.12, "eval_logits/chosen": -2.3460586071014404, "eval_logits/rejected": -2.158717393875122, "eval_logps/chosen": -227.45773315429688, "eval_logps/rejected": -213.3264617919922, "eval_loss": 0.6906041502952576, "eval_rewards/accuracies": 0.6290000081062317, "eval_rewards/chosen": 0.04547214135527611, "eval_rewards/margins": 0.06261760741472244, "eval_rewards/rejected": -0.017145469784736633, "eval_runtime": 707.9289, "eval_samples_per_second": 2.825, "eval_steps_per_second": 1.413, "step": 1900 }, { "epoch": 0.12, "learning_rate": 4.9905405812364014e-06, "logits/chosen": -2.3040223121643066, "logits/rejected": -2.288963794708252, "logps/chosen": -196.22824096679688, "logps/rejected": -204.9025115966797, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.06067962199449539, "rewards/margins": 0.062089789658784866, "rewards/rejected": -0.0014101641718298197, "step": 1910 }, { "epoch": 0.13, "learning_rate": 4.990037841692791e-06, "logits/chosen": -2.2711470127105713, "logits/rejected": -2.027571201324463, "logps/chosen": -203.07684326171875, "logps/rejected": -167.22711181640625, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02964043617248535, "rewards/margins": 0.0743485689163208, "rewards/rejected": -0.04470812529325485, "step": 1920 }, { "epoch": 0.13, "learning_rate": 4.989522112892039e-06, "logits/chosen": -2.3281807899475098, "logits/rejected": -2.2966067790985107, "logps/chosen": -198.56414794921875, "logps/rejected": -204.41529846191406, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.009347607381641865, "rewards/margins": 0.06741191446781158, "rewards/rejected": -0.07675951719284058, "step": 1930 }, { "epoch": 0.13, "learning_rate": 4.98899339752444e-06, "logits/chosen": -2.370422124862671, "logits/rejected": -2.1254730224609375, "logps/chosen": -227.0062255859375, "logps/rejected": -208.43771362304688, "loss": 0.688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.025960898026823997, "rewards/margins": 0.10374845564365387, "rewards/rejected": -0.07778755575418472, "step": 1940 }, { "epoch": 0.13, "learning_rate": 4.988451698348033e-06, "logits/chosen": -2.287224769592285, "logits/rejected": -2.2654972076416016, "logps/chosen": -177.00662231445312, "logps/rejected": -202.26292419433594, "loss": 0.6936, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.013209563679993153, "rewards/margins": 0.04986957833170891, "rewards/rejected": -0.03666001558303833, "step": 1950 }, { "epoch": 0.13, "learning_rate": 4.987897018188585e-06, "logits/chosen": -2.263166904449463, "logits/rejected": -2.024923801422119, "logps/chosen": -222.4883270263672, "logps/rejected": -171.92982482910156, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0013100657379254699, "rewards/margins": 0.061175812035799026, "rewards/rejected": -0.05986575409770012, "step": 1960 }, { "epoch": 0.13, "learning_rate": 4.9873293599395814e-06, "logits/chosen": -2.2896173000335693, "logits/rejected": -2.1597745418548584, "logps/chosen": -195.554931640625, "logps/rejected": -200.31951904296875, "loss": 0.6876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02123176120221615, "rewards/margins": 0.08748042583465576, "rewards/rejected": -0.10871219635009766, "step": 1970 }, { "epoch": 0.13, "learning_rate": 4.986748726562203e-06, "logits/chosen": -2.3323073387145996, "logits/rejected": -2.2020726203918457, "logps/chosen": -216.76565551757812, "logps/rejected": -197.0693359375, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.009111806750297546, "rewards/margins": 0.05196034908294678, "rewards/rejected": -0.061072152107954025, "step": 1980 }, { "epoch": 0.13, "learning_rate": 4.98615512108532e-06, "logits/chosen": -2.42622709274292, "logits/rejected": -2.2754573822021484, "logps/chosen": -218.6569061279297, "logps/rejected": -224.9327850341797, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.028538372367620468, "rewards/margins": 0.044925060123205185, "rewards/rejected": -0.016386687755584717, "step": 1990 }, { "epoch": 0.13, "learning_rate": 4.985548546605469e-06, "logits/chosen": -2.1802406311035156, "logits/rejected": -2.314124584197998, "logps/chosen": -215.97323608398438, "logps/rejected": -237.9573974609375, "loss": 0.6907, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.028674548491835594, "rewards/margins": 0.04343647509813309, "rewards/rejected": -0.07211102545261383, "step": 2000 }, { "epoch": 0.13, "eval_logits/chosen": -2.355757713317871, "eval_logits/rejected": -2.16721248626709, "eval_logps/chosen": -232.93431091308594, "eval_logps/rejected": -220.59490966796875, "eval_loss": 0.6904016137123108, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.009293550625443459, "eval_rewards/margins": 0.08053648471832275, "eval_rewards/rejected": -0.08983004093170166, "eval_runtime": 709.7269, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.984929006286838e-06, "logits/chosen": -2.2016310691833496, "logits/rejected": -2.152900457382202, "logps/chosen": -211.8243865966797, "logps/rejected": -217.55712890625, "loss": 0.6935, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.025103915482759476, "rewards/margins": 0.022764649242162704, "rewards/rejected": -0.04786856472492218, "step": 2010 }, { "epoch": 0.13, "learning_rate": 4.984296503361256e-06, "logits/chosen": -2.4327239990234375, "logits/rejected": -2.072183132171631, "logps/chosen": -202.29269409179688, "logps/rejected": -166.57730102539062, "loss": 0.6914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0037281368859112263, "rewards/margins": 0.06288442015647888, "rewards/rejected": -0.06661255657672882, "step": 2020 }, { "epoch": 0.13, "learning_rate": 4.9836510411281645e-06, "logits/chosen": -2.2853875160217285, "logits/rejected": -2.1659188270568848, "logps/chosen": -278.9981384277344, "logps/rejected": -251.56393432617188, "loss": 0.6867, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.004663098603487015, "rewards/margins": 0.1196819394826889, "rewards/rejected": -0.12434504926204681, "step": 2030 }, { "epoch": 0.13, "learning_rate": 4.982992622954613e-06, "logits/chosen": -2.422229766845703, "logits/rejected": -2.1254818439483643, "logps/chosen": -282.1938171386719, "logps/rejected": -176.20748901367188, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013343462720513344, "rewards/margins": 0.08182214200496674, "rewards/rejected": -0.06847867369651794, "step": 2040 }, { "epoch": 0.13, "learning_rate": 4.9823212522752325e-06, "logits/chosen": -2.530290126800537, "logits/rejected": -2.2488186359405518, "logps/chosen": -275.6991882324219, "logps/rejected": -252.4962615966797, "loss": 0.6881, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.02114512026309967, "rewards/margins": 0.10833090543746948, "rewards/rejected": -0.08718578517436981, "step": 2050 }, { "epoch": 0.13, "learning_rate": 4.981636932592222e-06, "logits/chosen": -2.287421703338623, "logits/rejected": -2.181699275970459, "logps/chosen": -207.735107421875, "logps/rejected": -210.69009399414062, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.05177903175354004, "rewards/margins": 0.0697595402598381, "rewards/rejected": -0.017980504781007767, "step": 2060 }, { "epoch": 0.14, "learning_rate": 4.980939667475328e-06, "logits/chosen": -2.505133628845215, "logits/rejected": -2.146200656890869, "logps/chosen": -268.6704406738281, "logps/rejected": -213.940673828125, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": 0.05504737049341202, "rewards/margins": 0.0586230531334877, "rewards/rejected": -0.0035756707657128572, "step": 2070 }, { "epoch": 0.14, "learning_rate": 4.980229460561826e-06, "logits/chosen": -2.353895664215088, "logits/rejected": -2.2923059463500977, "logps/chosen": -214.421630859375, "logps/rejected": -210.04910278320312, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03659746050834656, "rewards/margins": 0.10933919250965118, "rewards/rejected": -0.07274172455072403, "step": 2080 }, { "epoch": 0.14, "learning_rate": 4.979506315556503e-06, "logits/chosen": -2.316563129425049, "logits/rejected": -1.9798656702041626, "logps/chosen": -284.0970153808594, "logps/rejected": -241.884521484375, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.023951154202222824, "rewards/margins": 0.08775506168603897, "rewards/rejected": -0.06380391120910645, "step": 2090 }, { "epoch": 0.14, "learning_rate": 4.9787702362316395e-06, "logits/chosen": -2.36979079246521, "logits/rejected": -2.592101812362671, "logps/chosen": -188.11888122558594, "logps/rejected": -218.1317901611328, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.027083251625299454, "rewards/margins": 0.04498888552188873, "rewards/rejected": -0.01790563203394413, "step": 2100 }, { "epoch": 0.14, "eval_logits/chosen": -2.3738138675689697, "eval_logits/rejected": -2.1836633682250977, "eval_logps/chosen": -229.5577850341797, "eval_logps/rejected": -215.9217529296875, "eval_loss": 0.6904562711715698, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.024471644312143326, "eval_rewards/margins": 0.06757022440433502, "eval_rewards/rejected": -0.04309859126806259, "eval_runtime": 710.6569, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 2100 }, { "epoch": 0.14, "learning_rate": 4.9780212264269835e-06, "logits/chosen": -2.3147263526916504, "logits/rejected": -2.065084934234619, "logps/chosen": -182.92022705078125, "logps/rejected": -169.31704711914062, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01898839697241783, "rewards/margins": 0.03532510995864868, "rewards/rejected": -0.01633671671152115, "step": 2110 }, { "epoch": 0.14, "learning_rate": 4.977259290049739e-06, "logits/chosen": -2.568851947784424, "logits/rejected": -1.9942678213119507, "logps/chosen": -281.524658203125, "logps/rejected": -226.5505828857422, "loss": 0.6856, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05417801812291145, "rewards/margins": 0.14544400572776794, "rewards/rejected": -0.0912659615278244, "step": 2120 }, { "epoch": 0.14, "learning_rate": 4.976484431074538e-06, "logits/chosen": -2.3134965896606445, "logits/rejected": -2.251204252243042, "logps/chosen": -191.16305541992188, "logps/rejected": -178.55044555664062, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.026424426585435867, "rewards/margins": 0.04989726096391678, "rewards/rejected": -0.02347283437848091, "step": 2130 }, { "epoch": 0.14, "learning_rate": 4.975696653543425e-06, "logits/chosen": -2.399449586868286, "logits/rejected": -2.1439669132232666, "logps/chosen": -248.66311645507812, "logps/rejected": -245.24496459960938, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.009857301600277424, "rewards/margins": 0.09151118248701096, "rewards/rejected": -0.08165387064218521, "step": 2140 }, { "epoch": 0.14, "learning_rate": 4.974895961565835e-06, "logits/chosen": -2.325570583343506, "logits/rejected": -1.9811022281646729, "logps/chosen": -174.66339111328125, "logps/rejected": -187.84197998046875, "loss": 0.6885, "rewards/accuracies": 0.625, "rewards/chosen": 0.005875427275896072, "rewards/margins": 0.07950626313686371, "rewards/rejected": -0.07363083958625793, "step": 2150 }, { "epoch": 0.14, "learning_rate": 4.974082359318566e-06, "logits/chosen": -2.320629596710205, "logits/rejected": -2.144028902053833, "logps/chosen": -252.28860473632812, "logps/rejected": -216.6935577392578, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013614351861178875, "rewards/margins": 0.09884041547775269, "rewards/rejected": -0.08522607386112213, "step": 2160 }, { "epoch": 0.14, "learning_rate": 4.973255851045769e-06, "logits/chosen": -2.2949633598327637, "logits/rejected": -2.3036093711853027, "logps/chosen": -215.01193237304688, "logps/rejected": -185.1837158203125, "loss": 0.6907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.023462986573576927, "rewards/margins": 0.06919287145137787, "rewards/rejected": -0.04572988301515579, "step": 2170 }, { "epoch": 0.14, "learning_rate": 4.972416441058915e-06, "logits/chosen": -2.2384517192840576, "logits/rejected": -2.0826570987701416, "logps/chosen": -227.3652801513672, "logps/rejected": -211.1637420654297, "loss": 0.6888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.023905407637357712, "rewards/margins": 0.10440375655889511, "rewards/rejected": -0.0804983526468277, "step": 2180 }, { "epoch": 0.14, "learning_rate": 4.971564133736777e-06, "logits/chosen": -2.1866540908813477, "logits/rejected": -2.0094194412231445, "logps/chosen": -174.2550506591797, "logps/rejected": -188.44947814941406, "loss": 0.6874, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.037455908954143524, "rewards/margins": 0.09213308990001678, "rewards/rejected": -0.05467717722058296, "step": 2190 }, { "epoch": 0.14, "learning_rate": 4.970698933525409e-06, "logits/chosen": -2.478977918624878, "logits/rejected": -2.19539737701416, "logps/chosen": -281.2948303222656, "logps/rejected": -250.3472900390625, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.01947910524904728, "rewards/margins": 0.08724024891853333, "rewards/rejected": -0.10671935975551605, "step": 2200 }, { "epoch": 0.14, "eval_logits/chosen": -2.3566019535064697, "eval_logits/rejected": -2.166872262954712, "eval_logps/chosen": -234.11630249023438, "eval_logps/rejected": -221.84378051757812, "eval_loss": 0.690406084060669, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": -0.021113485097885132, "eval_rewards/margins": 0.08120539039373398, "eval_rewards/rejected": -0.10231887549161911, "eval_runtime": 713.5703, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 2200 }, { "epoch": 0.14, "learning_rate": 4.969820844938118e-06, "logits/chosen": -2.4681389331817627, "logits/rejected": -2.141389846801758, "logps/chosen": -223.8988800048828, "logps/rejected": -179.49386596679688, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.012956234626471996, "rewards/margins": 0.0949764996767044, "rewards/rejected": -0.10793273150920868, "step": 2210 }, { "epoch": 0.15, "learning_rate": 4.968929872555444e-06, "logits/chosen": -1.9914073944091797, "logits/rejected": -2.178544521331787, "logps/chosen": -223.1538543701172, "logps/rejected": -262.5058898925781, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06019466370344162, "rewards/margins": 0.03688964992761612, "rewards/rejected": -0.09708431363105774, "step": 2220 }, { "epoch": 0.15, "learning_rate": 4.968026021025137e-06, "logits/chosen": -2.4021904468536377, "logits/rejected": -2.184993267059326, "logps/chosen": -201.25662231445312, "logps/rejected": -168.76951599121094, "loss": 0.6889, "rewards/accuracies": 0.5, "rewards/chosen": 0.0030830535106360912, "rewards/margins": 0.07578281313180923, "rewards/rejected": -0.07269976288080215, "step": 2230 }, { "epoch": 0.15, "learning_rate": 4.967109295062128e-06, "logits/chosen": -2.2592692375183105, "logits/rejected": -2.035545825958252, "logps/chosen": -223.9074249267578, "logps/rejected": -253.28909301757812, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": 0.010940281674265862, "rewards/margins": 0.08460705727338791, "rewards/rejected": -0.0736667662858963, "step": 2240 }, { "epoch": 0.15, "learning_rate": 4.966179699448509e-06, "logits/chosen": -2.2413432598114014, "logits/rejected": -2.0473268032073975, "logps/chosen": -191.69692993164062, "logps/rejected": -176.70880126953125, "loss": 0.6916, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.009419666603207588, "rewards/margins": 0.02837999537587166, "rewards/rejected": -0.037799663841724396, "step": 2250 }, { "epoch": 0.15, "learning_rate": 4.965237239033506e-06, "logits/chosen": -2.410356044769287, "logits/rejected": -2.2265586853027344, "logps/chosen": -286.68280029296875, "logps/rejected": -257.287841796875, "loss": 0.6858, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.029759634286165237, "rewards/margins": 0.1538696587085724, "rewards/rejected": -0.12411002069711685, "step": 2260 }, { "epoch": 0.15, "learning_rate": 4.964281918733453e-06, "logits/chosen": -2.3516123294830322, "logits/rejected": -2.0975680351257324, "logps/chosen": -186.3314666748047, "logps/rejected": -193.1648406982422, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03217019513249397, "rewards/margins": 0.11290119588375092, "rewards/rejected": -0.14507140219211578, "step": 2270 }, { "epoch": 0.15, "learning_rate": 4.9633137435317715e-06, "logits/chosen": -2.367588758468628, "logits/rejected": -1.7204262018203735, "logps/chosen": -223.7427978515625, "logps/rejected": -165.84214782714844, "loss": 0.6881, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.008477909490466118, "rewards/margins": 0.09884487092494965, "rewards/rejected": -0.10732278972864151, "step": 2280 }, { "epoch": 0.15, "learning_rate": 4.9623327184789355e-06, "logits/chosen": -2.464026689529419, "logits/rejected": -2.3778140544891357, "logps/chosen": -216.4389190673828, "logps/rejected": -217.1889190673828, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013015327975153923, "rewards/margins": 0.055762697011232376, "rewards/rejected": -0.042747363448143005, "step": 2290 }, { "epoch": 0.15, "learning_rate": 4.9613388486924525e-06, "logits/chosen": -2.069906234741211, "logits/rejected": -2.2107555866241455, "logps/chosen": -184.16355895996094, "logps/rejected": -209.44485473632812, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.040295325219631195, "rewards/margins": 0.0790117159485817, "rewards/rejected": -0.11930704116821289, "step": 2300 }, { "epoch": 0.15, "eval_logits/chosen": -2.3594470024108887, "eval_logits/rejected": -2.1697957515716553, "eval_logps/chosen": -235.0392608642578, "eval_logps/rejected": -223.16970825195312, "eval_loss": 0.6907156705856323, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.03034323826432228, "eval_rewards/margins": 0.08523471653461456, "eval_rewards/rejected": -0.11557795852422714, "eval_runtime": 709.1569, "eval_samples_per_second": 2.82, "eval_steps_per_second": 1.41, "step": 2300 }, { "epoch": 0.15, "learning_rate": 4.960332139356834e-06, "logits/chosen": -2.311826705932617, "logits/rejected": -2.130279541015625, "logps/chosen": -211.80313110351562, "logps/rejected": -195.69424438476562, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.029862677678465843, "rewards/margins": 0.10331012308597565, "rewards/rejected": -0.13317279517650604, "step": 2310 }, { "epoch": 0.15, "learning_rate": 4.95931259572357e-06, "logits/chosen": -2.4080193042755127, "logits/rejected": -2.043299913406372, "logps/chosen": -235.7884521484375, "logps/rejected": -271.4584655761719, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015484524890780449, "rewards/margins": 0.09996424615383148, "rewards/rejected": -0.11544877290725708, "step": 2320 }, { "epoch": 0.15, "learning_rate": 4.9582802231111e-06, "logits/chosen": -2.2260050773620605, "logits/rejected": -2.310255527496338, "logps/chosen": -211.4370574951172, "logps/rejected": -191.3184814453125, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": 0.004370751790702343, "rewards/margins": 0.06759954988956451, "rewards/rejected": -0.06322880834341049, "step": 2330 }, { "epoch": 0.15, "learning_rate": 4.957235026904782e-06, "logits/chosen": -2.393613815307617, "logits/rejected": -2.0691580772399902, "logps/chosen": -256.73382568359375, "logps/rejected": -211.1546173095703, "loss": 0.69, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.05418550223112106, "rewards/margins": 0.055359721183776855, "rewards/rejected": -0.001174215809442103, "step": 2340 }, { "epoch": 0.15, "learning_rate": 4.956177012556875e-06, "logits/chosen": -2.478972911834717, "logits/rejected": -2.2455832958221436, "logps/chosen": -243.0150604248047, "logps/rejected": -179.1586456298828, "loss": 0.6885, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.011777469888329506, "rewards/margins": 0.0771162286400795, "rewards/rejected": -0.06533874571323395, "step": 2350 }, { "epoch": 0.15, "learning_rate": 4.9551061855864976e-06, "logits/chosen": -2.150123119354248, "logits/rejected": -2.202948808670044, "logps/chosen": -191.73294067382812, "logps/rejected": -204.2236785888672, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.010951901786029339, "rewards/margins": 0.0828014463186264, "rewards/rejected": -0.07184954732656479, "step": 2360 }, { "epoch": 0.16, "learning_rate": 4.95402255157961e-06, "logits/chosen": -2.189168930053711, "logits/rejected": -2.2527401447296143, "logps/chosen": -182.8531494140625, "logps/rejected": -255.9742431640625, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02203099988400936, "rewards/margins": 0.08574902266263962, "rewards/rejected": -0.0637180283665657, "step": 2370 }, { "epoch": 0.16, "learning_rate": 4.952926116188977e-06, "logits/chosen": -2.4717514514923096, "logits/rejected": -2.3997349739074707, "logps/chosen": -182.4977264404297, "logps/rejected": -227.0753173828125, "loss": 0.6932, "rewards/accuracies": 0.5, "rewards/chosen": 0.00879613310098648, "rewards/margins": 0.05373033136129379, "rewards/rejected": -0.04493419826030731, "step": 2380 }, { "epoch": 0.16, "learning_rate": 4.951816885134143e-06, "logits/chosen": -2.305720567703247, "logits/rejected": -2.2849538326263428, "logps/chosen": -200.81576538085938, "logps/rejected": -206.62277221679688, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.007371959276497364, "rewards/margins": 0.0666775330901146, "rewards/rejected": -0.059305571019649506, "step": 2390 }, { "epoch": 0.16, "learning_rate": 4.950694864201399e-06, "logits/chosen": -2.3164827823638916, "logits/rejected": -2.2348294258117676, "logps/chosen": -236.01382446289062, "logps/rejected": -246.91506958007812, "loss": 0.6899, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.026681452989578247, "rewards/margins": 0.0877201110124588, "rewards/rejected": -0.06103866174817085, "step": 2400 }, { "epoch": 0.16, "eval_logits/chosen": -2.334512710571289, "eval_logits/rejected": -2.1472020149230957, "eval_logps/chosen": -228.88552856445312, "eval_logps/rejected": -215.46128845214844, "eval_loss": 0.690426766872406, "eval_rewards/accuracies": 0.6225000023841858, "eval_rewards/chosen": 0.031194256618618965, "eval_rewards/margins": 0.06968805193901062, "eval_rewards/rejected": -0.038493797183036804, "eval_runtime": 713.4104, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 2400 }, { "epoch": 0.16, "learning_rate": 4.9495600592437575e-06, "logits/chosen": -2.3851048946380615, "logits/rejected": -2.2106268405914307, "logps/chosen": -228.5146484375, "logps/rejected": -235.97958374023438, "loss": 0.6918, "rewards/accuracies": 0.625, "rewards/chosen": -0.016201911494135857, "rewards/margins": 0.04283355548977852, "rewards/rejected": -0.059035468846559525, "step": 2410 }, { "epoch": 0.16, "learning_rate": 4.948412476180917e-06, "logits/chosen": -2.289257526397705, "logits/rejected": -2.005004405975342, "logps/chosen": -185.69271850585938, "logps/rejected": -171.775146484375, "loss": 0.6909, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.016168467700481415, "rewards/margins": 0.08819150179624557, "rewards/rejected": -0.10435996949672699, "step": 2420 }, { "epoch": 0.16, "learning_rate": 4.947252120999232e-06, "logits/chosen": -2.3191158771514893, "logits/rejected": -2.045474052429199, "logps/chosen": -267.0201721191406, "logps/rejected": -212.0325927734375, "loss": 0.6928, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0008467677980661392, "rewards/margins": 0.03786264732480049, "rewards/rejected": -0.037015873938798904, "step": 2430 }, { "epoch": 0.16, "learning_rate": 4.946078999751683e-06, "logits/chosen": -2.22875714302063, "logits/rejected": -2.160421371459961, "logps/chosen": -175.55667114257812, "logps/rejected": -161.97189331054688, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04070456326007843, "rewards/margins": 0.07971414923667908, "rewards/rejected": -0.039009593427181244, "step": 2440 }, { "epoch": 0.16, "learning_rate": 4.944893118557847e-06, "logits/chosen": -2.142076015472412, "logits/rejected": -2.10664439201355, "logps/chosen": -203.5096893310547, "logps/rejected": -157.3628692626953, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": 0.02974940836429596, "rewards/margins": 0.07543652504682541, "rewards/rejected": -0.04568710923194885, "step": 2450 }, { "epoch": 0.16, "learning_rate": 4.943694483603861e-06, "logits/chosen": -2.4463329315185547, "logits/rejected": -2.032438278198242, "logps/chosen": -222.4589385986328, "logps/rejected": -179.25404357910156, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": 0.035450879484415054, "rewards/margins": 0.077740877866745, "rewards/rejected": -0.04229000210762024, "step": 2460 }, { "epoch": 0.16, "learning_rate": 4.9424831011423914e-06, "logits/chosen": -2.408552408218384, "logits/rejected": -2.3169188499450684, "logps/chosen": -288.58917236328125, "logps/rejected": -253.8071746826172, "loss": 0.6926, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0016259975964203477, "rewards/margins": 0.02232646383345127, "rewards/rejected": -0.023952458053827286, "step": 2470 }, { "epoch": 0.16, "learning_rate": 4.9412589774926015e-06, "logits/chosen": -2.4122543334960938, "logits/rejected": -2.113245964050293, "logps/chosen": -273.45263671875, "logps/rejected": -232.2276611328125, "loss": 0.6908, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0073710111901164055, "rewards/margins": 0.09398536384105682, "rewards/rejected": -0.08661436289548874, "step": 2480 }, { "epoch": 0.16, "learning_rate": 4.940022119040121e-06, "logits/chosen": -2.465616226196289, "logits/rejected": -2.161163806915283, "logps/chosen": -287.33282470703125, "logps/rejected": -265.46343994140625, "loss": 0.6919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.045831646770238876, "rewards/margins": 0.059488773345947266, "rewards/rejected": -0.013657125644385815, "step": 2490 }, { "epoch": 0.16, "learning_rate": 4.93877253223701e-06, "logits/chosen": -2.417496919631958, "logits/rejected": -2.174833297729492, "logps/chosen": -285.5562438964844, "logps/rejected": -259.3797302246094, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06715986877679825, "rewards/margins": 0.053962014615535736, "rewards/rejected": 0.013197846710681915, "step": 2500 }, { "epoch": 0.16, "eval_logits/chosen": -2.3545589447021484, "eval_logits/rejected": -2.165816068649292, "eval_logps/chosen": -226.2342071533203, "eval_logps/rejected": -212.3520965576172, "eval_loss": 0.6904910802841187, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": 0.05770741403102875, "eval_rewards/margins": 0.06510914862155914, "eval_rewards/rejected": -0.007401737384498119, "eval_runtime": 713.7916, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.937510223601725e-06, "logits/chosen": -2.5731029510498047, "logits/rejected": -2.4025304317474365, "logps/chosen": -254.93716430664062, "logps/rejected": -217.3448944091797, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.07588844746351242, "rewards/margins": 0.04619182273745537, "rewards/rejected": 0.029696622863411903, "step": 2510 }, { "epoch": 0.16, "learning_rate": 4.936235199719085e-06, "logits/chosen": -2.3957393169403076, "logits/rejected": -2.2664778232574463, "logps/chosen": -164.94325256347656, "logps/rejected": -142.4560089111328, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.07358762621879578, "rewards/margins": 0.07691960036754608, "rewards/rejected": -0.0033319753129035234, "step": 2520 }, { "epoch": 0.17, "learning_rate": 4.93494746724024e-06, "logits/chosen": -2.384202480316162, "logits/rejected": -2.121302843093872, "logps/chosen": -218.81015014648438, "logps/rejected": -243.3566436767578, "loss": 0.6902, "rewards/accuracies": 0.75, "rewards/chosen": 0.0499432310461998, "rewards/margins": 0.07086510956287384, "rewards/rejected": -0.020921876654028893, "step": 2530 }, { "epoch": 0.17, "learning_rate": 4.933647032882635e-06, "logits/chosen": -2.5266172885894775, "logits/rejected": -2.211371898651123, "logps/chosen": -236.7266082763672, "logps/rejected": -201.0948486328125, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0813523381948471, "rewards/margins": 0.08787768334150314, "rewards/rejected": -0.0065253423526883125, "step": 2540 }, { "epoch": 0.17, "learning_rate": 4.932333903429969e-06, "logits/chosen": -2.205667495727539, "logits/rejected": -2.058753728866577, "logps/chosen": -188.5623779296875, "logps/rejected": -162.65000915527344, "loss": 0.6944, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.045201074331998825, "rewards/margins": -0.001063268631696701, "rewards/rejected": 0.046264342963695526, "step": 2550 }, { "epoch": 0.17, "learning_rate": 4.931008085732172e-06, "logits/chosen": -2.3989763259887695, "logits/rejected": -1.9831393957138062, "logps/chosen": -197.83444213867188, "logps/rejected": -153.06373596191406, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04988477751612663, "rewards/margins": 0.05230678245425224, "rewards/rejected": -0.0024220068007707596, "step": 2560 }, { "epoch": 0.17, "learning_rate": 4.9296695867053565e-06, "logits/chosen": -2.357609510421753, "logits/rejected": -2.1423609256744385, "logps/chosen": -294.01385498046875, "logps/rejected": -234.3355712890625, "loss": 0.6911, "rewards/accuracies": 0.75, "rewards/chosen": 0.06587690114974976, "rewards/margins": 0.06281637400388718, "rewards/rejected": 0.00306052272208035, "step": 2570 }, { "epoch": 0.17, "learning_rate": 4.928318413331791e-06, "logits/chosen": -2.4487571716308594, "logits/rejected": -2.233754873275757, "logps/chosen": -205.7588653564453, "logps/rejected": -195.708251953125, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.054682277143001556, "rewards/margins": 0.05934653431177139, "rewards/rejected": -0.004664266016334295, "step": 2580 }, { "epoch": 0.17, "learning_rate": 4.926954572659855e-06, "logits/chosen": -2.223047971725464, "logits/rejected": -2.236845016479492, "logps/chosen": -234.7522430419922, "logps/rejected": -261.69873046875, "loss": 0.6893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07443410903215408, "rewards/margins": 0.09760646522045135, "rewards/rejected": -0.02317235991358757, "step": 2590 }, { "epoch": 0.17, "learning_rate": 4.925578071804013e-06, "logits/chosen": -2.245145320892334, "logits/rejected": -2.1898436546325684, "logps/chosen": -227.51473999023438, "logps/rejected": -292.41217041015625, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.021910278126597404, "rewards/margins": 0.07485561072826385, "rewards/rejected": -0.052945323288440704, "step": 2600 }, { "epoch": 0.17, "eval_logits/chosen": -2.3452558517456055, "eval_logits/rejected": -2.157046318054199, "eval_logps/chosen": -226.8026885986328, "eval_logps/rejected": -213.66268920898438, "eval_loss": 0.6903373003005981, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": 0.05202279984951019, "eval_rewards/margins": 0.07253072410821915, "eval_rewards/rejected": -0.020507927983999252, "eval_runtime": 710.3899, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.408, "step": 2600 }, { "epoch": 0.17, "learning_rate": 4.924188917944763e-06, "logits/chosen": -2.412496328353882, "logits/rejected": -2.234360456466675, "logps/chosen": -214.3735809326172, "logps/rejected": -191.68753051757812, "loss": 0.6881, "rewards/accuracies": 0.75, "rewards/chosen": 0.05631525442004204, "rewards/margins": 0.10834388434886932, "rewards/rejected": -0.052028633654117584, "step": 2610 }, { "epoch": 0.17, "learning_rate": 4.922787118328617e-06, "logits/chosen": -2.432962656021118, "logits/rejected": -2.1155600547790527, "logps/chosen": -226.4817657470703, "logps/rejected": -155.4060516357422, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.03535914793610573, "rewards/margins": 0.052371758967638016, "rewards/rejected": -0.01701260730624199, "step": 2620 }, { "epoch": 0.17, "learning_rate": 4.921372680268045e-06, "logits/chosen": -2.411449909210205, "logits/rejected": -2.0770812034606934, "logps/chosen": -228.71273803710938, "logps/rejected": -201.04159545898438, "loss": 0.6936, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.012919628992676735, "rewards/margins": 0.026976149529218674, "rewards/rejected": -0.01405651867389679, "step": 2630 }, { "epoch": 0.17, "learning_rate": 4.919945611141451e-06, "logits/chosen": -2.459571361541748, "logits/rejected": -2.078706979751587, "logps/chosen": -218.890869140625, "logps/rejected": -165.82656860351562, "loss": 0.6891, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.06343764066696167, "rewards/margins": 0.05455031245946884, "rewards/rejected": 0.008887320756912231, "step": 2640 }, { "epoch": 0.17, "learning_rate": 4.918505918393125e-06, "logits/chosen": -2.321850538253784, "logits/rejected": -2.1715524196624756, "logps/chosen": -163.71751403808594, "logps/rejected": -194.63999938964844, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": 0.05313184857368469, "rewards/margins": 0.07549114525318146, "rewards/rejected": -0.022359298542141914, "step": 2650 }, { "epoch": 0.17, "learning_rate": 4.91705360953321e-06, "logits/chosen": -2.376260995864868, "logits/rejected": -2.14408540725708, "logps/chosen": -244.6078643798828, "logps/rejected": -220.7104034423828, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.008797365240752697, "rewards/margins": 0.08757869899272919, "rewards/rejected": -0.07878134399652481, "step": 2660 }, { "epoch": 0.17, "learning_rate": 4.9155886921376615e-06, "logits/chosen": -2.2897629737854004, "logits/rejected": -2.244919776916504, "logps/chosen": -208.5734405517578, "logps/rejected": -236.90902709960938, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.026050258427858353, "rewards/margins": 0.06667254120111465, "rewards/rejected": -0.0927228033542633, "step": 2670 }, { "epoch": 0.18, "learning_rate": 4.914111173848205e-06, "logits/chosen": -2.3279356956481934, "logits/rejected": -2.280183792114258, "logps/chosen": -237.3335418701172, "logps/rejected": -226.715576171875, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04301954060792923, "rewards/margins": 0.04799992963671684, "rewards/rejected": -0.09101946651935577, "step": 2680 }, { "epoch": 0.18, "learning_rate": 4.9126210623723e-06, "logits/chosen": -2.1070661544799805, "logits/rejected": -2.2766873836517334, "logps/chosen": -201.4837646484375, "logps/rejected": -244.10855102539062, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.011103630065917969, "rewards/margins": 0.08852796256542206, "rewards/rejected": -0.09963159263134003, "step": 2690 }, { "epoch": 0.18, "learning_rate": 4.911118365483098e-06, "logits/chosen": -2.2129688262939453, "logits/rejected": -2.2985918521881104, "logps/chosen": -202.52978515625, "logps/rejected": -220.18173217773438, "loss": 0.6901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.007755742873996496, "rewards/margins": 0.08225713670253754, "rewards/rejected": -0.09001287072896957, "step": 2700 }, { "epoch": 0.18, "eval_logits/chosen": -2.3249430656433105, "eval_logits/rejected": -2.138172149658203, "eval_logps/chosen": -231.6273651123047, "eval_logps/rejected": -217.83657836914062, "eval_loss": 0.6905510425567627, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": 0.003775849472731352, "eval_rewards/margins": 0.06602264940738678, "eval_rewards/rejected": -0.06224680691957474, "eval_runtime": 712.2335, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 2700 }, { "epoch": 0.18, "learning_rate": 4.909603091019403e-06, "logits/chosen": -2.4967615604400635, "logits/rejected": -2.1264119148254395, "logps/chosen": -232.87014770507812, "logps/rejected": -201.87669372558594, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03507193177938461, "rewards/margins": 0.05626847222447395, "rewards/rejected": -0.021196534857153893, "step": 2710 }, { "epoch": 0.18, "learning_rate": 4.908075246885626e-06, "logits/chosen": -2.2765352725982666, "logits/rejected": -2.204392910003662, "logps/chosen": -149.35107421875, "logps/rejected": -125.10282135009766, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01376400887966156, "rewards/margins": 0.03143042325973511, "rewards/rejected": -0.017666416242718697, "step": 2720 }, { "epoch": 0.18, "learning_rate": 4.906534841051755e-06, "logits/chosen": -2.138417959213257, "logits/rejected": -2.202500820159912, "logps/chosen": -241.3466339111328, "logps/rejected": -257.94903564453125, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": 0.02056839130818844, "rewards/margins": 0.055667709559202194, "rewards/rejected": -0.03509931638836861, "step": 2730 }, { "epoch": 0.18, "learning_rate": 4.904981881553297e-06, "logits/chosen": -2.4024128913879395, "logits/rejected": -2.0706582069396973, "logps/chosen": -223.4213409423828, "logps/rejected": -164.6261444091797, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.007749582640826702, "rewards/margins": 0.05322981998324394, "rewards/rejected": -0.04548024386167526, "step": 2740 }, { "epoch": 0.18, "learning_rate": 4.903416376491252e-06, "logits/chosen": -2.4039175510406494, "logits/rejected": -2.008875608444214, "logps/chosen": -277.7300720214844, "logps/rejected": -245.2926788330078, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005621733609586954, "rewards/margins": 0.08154728263616562, "rewards/rejected": -0.0759255513548851, "step": 2750 }, { "epoch": 0.18, "learning_rate": 4.90183833403206e-06, "logits/chosen": -2.467904567718506, "logits/rejected": -2.3105967044830322, "logps/chosen": -261.31292724609375, "logps/rejected": -234.25772094726562, "loss": 0.6895, "rewards/accuracies": 0.625, "rewards/chosen": 0.029001805931329727, "rewards/margins": 0.09197796136140823, "rewards/rejected": -0.0629761591553688, "step": 2760 }, { "epoch": 0.18, "learning_rate": 4.900247762407564e-06, "logits/chosen": -2.228250026702881, "logits/rejected": -2.0147242546081543, "logps/chosen": -173.1147003173828, "logps/rejected": -203.9661407470703, "loss": 0.6878, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.008227216079831123, "rewards/margins": 0.08808780461549759, "rewards/rejected": -0.09631501138210297, "step": 2770 }, { "epoch": 0.18, "learning_rate": 4.898644669914965e-06, "logits/chosen": -2.2914719581604004, "logits/rejected": -2.2036476135253906, "logps/chosen": -222.91055297851562, "logps/rejected": -216.57778930664062, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01161886565387249, "rewards/margins": 0.07493311911821365, "rewards/rejected": -0.06331426650285721, "step": 2780 }, { "epoch": 0.18, "learning_rate": 4.897029064916778e-06, "logits/chosen": -2.0988786220550537, "logits/rejected": -1.9013208150863647, "logps/chosen": -208.904541015625, "logps/rejected": -201.63589477539062, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.007546191103756428, "rewards/margins": 0.051201723515987396, "rewards/rejected": -0.058747924864292145, "step": 2790 }, { "epoch": 0.18, "learning_rate": 4.895400955840791e-06, "logits/chosen": -2.459437847137451, "logits/rejected": -1.807782769203186, "logps/chosen": -215.23831176757812, "logps/rejected": -182.17108154296875, "loss": 0.6909, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05112996697425842, "rewards/margins": 0.08630537241697311, "rewards/rejected": -0.035175397992134094, "step": 2800 }, { "epoch": 0.18, "eval_logits/chosen": -2.3020472526550293, "eval_logits/rejected": -2.1165366172790527, "eval_logps/chosen": -228.67945861816406, "eval_logps/rejected": -215.24508666992188, "eval_loss": 0.6903337240219116, "eval_rewards/accuracies": 0.6315000057220459, "eval_rewards/chosen": 0.03325507417321205, "eval_rewards/margins": 0.0695870891213417, "eval_rewards/rejected": -0.03633202239871025, "eval_runtime": 710.6994, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 2800 }, { "epoch": 0.18, "learning_rate": 4.893760351180018e-06, "logits/chosen": -2.3032994270324707, "logits/rejected": -2.2417054176330566, "logps/chosen": -195.91603088378906, "logps/rejected": -206.2035675048828, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.026252543553709984, "rewards/margins": 0.05186532065272331, "rewards/rejected": -0.02561277151107788, "step": 2810 }, { "epoch": 0.18, "learning_rate": 4.892107259492657e-06, "logits/chosen": -2.2603695392608643, "logits/rejected": -2.022012233734131, "logps/chosen": -237.22506713867188, "logps/rejected": -246.58621215820312, "loss": 0.6919, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.026342108845710754, "rewards/margins": 0.03342199698090553, "rewards/rejected": -0.0070798941887915134, "step": 2820 }, { "epoch": 0.19, "learning_rate": 4.890441689402042e-06, "logits/chosen": -2.3950798511505127, "logits/rejected": -2.2143449783325195, "logps/chosen": -331.57049560546875, "logps/rejected": -291.9052734375, "loss": 0.6898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03934413939714432, "rewards/margins": 0.08334746211767197, "rewards/rejected": -0.04400331899523735, "step": 2830 }, { "epoch": 0.19, "learning_rate": 4.888763649596606e-06, "logits/chosen": -2.402956485748291, "logits/rejected": -2.176975965499878, "logps/chosen": -208.06912231445312, "logps/rejected": -206.9716339111328, "loss": 0.69, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.020925721153616905, "rewards/margins": 0.07703931629657745, "rewards/rejected": -0.056113600730895996, "step": 2840 }, { "epoch": 0.19, "learning_rate": 4.887073148829824e-06, "logits/chosen": -2.3179640769958496, "logits/rejected": -2.170621871948242, "logps/chosen": -262.16778564453125, "logps/rejected": -246.90365600585938, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": 0.04735777527093887, "rewards/margins": 0.07671411335468292, "rewards/rejected": -0.029356345534324646, "step": 2850 }, { "epoch": 0.19, "learning_rate": 4.885370195920177e-06, "logits/chosen": -2.199707508087158, "logits/rejected": -2.139965057373047, "logps/chosen": -185.32675170898438, "logps/rejected": -182.80758666992188, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.044344521185849e-05, "rewards/margins": 0.060539864003658295, "rewards/rejected": -0.060570307075977325, "step": 2860 }, { "epoch": 0.19, "learning_rate": 4.883654799751101e-06, "logits/chosen": -2.105257749557495, "logits/rejected": -2.3305790424346924, "logps/chosen": -212.72607421875, "logps/rejected": -253.13320922851562, "loss": 0.6914, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.029830992221832275, "rewards/margins": 0.056669920682907104, "rewards/rejected": -0.026838932186365128, "step": 2870 }, { "epoch": 0.19, "learning_rate": 4.8819269692709435e-06, "logits/chosen": -2.4003872871398926, "logits/rejected": -2.227214813232422, "logps/chosen": -263.90191650390625, "logps/rejected": -199.56661987304688, "loss": 0.6899, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03144312649965286, "rewards/margins": 0.08026852458715439, "rewards/rejected": -0.048825401812791824, "step": 2880 }, { "epoch": 0.19, "learning_rate": 4.880186713492915e-06, "logits/chosen": -2.2545135021209717, "logits/rejected": -2.066854238510132, "logps/chosen": -227.1837921142578, "logps/rejected": -178.45132446289062, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.005335694644600153, "rewards/margins": 0.04889502376317978, "rewards/rejected": -0.043559327721595764, "step": 2890 }, { "epoch": 0.19, "learning_rate": 4.878434041495041e-06, "logits/chosen": -2.2972848415374756, "logits/rejected": -2.416718006134033, "logps/chosen": -233.84445190429688, "logps/rejected": -244.32199096679688, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": 0.033212922513484955, "rewards/margins": 0.08615333586931229, "rewards/rejected": -0.05294041708111763, "step": 2900 }, { "epoch": 0.19, "eval_logits/chosen": -2.296010971069336, "eval_logits/rejected": -2.1109728813171387, "eval_logps/chosen": -230.9010009765625, "eval_logps/rejected": -217.73272705078125, "eval_loss": 0.6902217268943787, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.011039442382752895, "eval_rewards/margins": 0.07224779576063156, "eval_rewards/rejected": -0.06120835244655609, "eval_runtime": 712.9014, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 2900 }, { "epoch": 0.19, "learning_rate": 4.876668962420117e-06, "logits/chosen": -2.2779107093811035, "logits/rejected": -2.0205576419830322, "logps/chosen": -285.90301513671875, "logps/rejected": -234.1432342529297, "loss": 0.6918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0374809131026268, "rewards/margins": 0.07856379449367523, "rewards/rejected": -0.04108288139104843, "step": 2910 }, { "epoch": 0.19, "learning_rate": 4.87489148547566e-06, "logits/chosen": -2.2894115447998047, "logits/rejected": -2.142322540283203, "logps/chosen": -261.393310546875, "logps/rejected": -233.22802734375, "loss": 0.6929, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.015348220244050026, "rewards/margins": 0.050076454877853394, "rewards/rejected": -0.06542467325925827, "step": 2920 }, { "epoch": 0.19, "learning_rate": 4.873101619933862e-06, "logits/chosen": -2.5051543712615967, "logits/rejected": -2.1486945152282715, "logps/chosen": -263.4872131347656, "logps/rejected": -221.30996704101562, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.010399745777249336, "rewards/margins": 0.07138345390558243, "rewards/rejected": -0.06098370626568794, "step": 2930 }, { "epoch": 0.19, "learning_rate": 4.8712993751315385e-06, "logits/chosen": -2.283648729324341, "logits/rejected": -2.2247979640960693, "logps/chosen": -120.84417724609375, "logps/rejected": -128.2853240966797, "loss": 0.6913, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.001862399629317224, "rewards/margins": 0.03539283573627472, "rewards/rejected": -0.03725523501634598, "step": 2940 }, { "epoch": 0.19, "learning_rate": 4.869484760470079e-06, "logits/chosen": -2.3379874229431152, "logits/rejected": -2.1309611797332764, "logps/chosen": -187.4462127685547, "logps/rejected": -158.68853759765625, "loss": 0.6884, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012146204710006714, "rewards/margins": 0.07541505247354507, "rewards/rejected": -0.06326885521411896, "step": 2950 }, { "epoch": 0.19, "learning_rate": 4.867657785415404e-06, "logits/chosen": -2.2649407386779785, "logits/rejected": -1.9867734909057617, "logps/chosen": -250.11520385742188, "logps/rejected": -217.9238739013672, "loss": 0.6893, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.010161913931369781, "rewards/margins": 0.09553287923336029, "rewards/rejected": -0.10569479316473007, "step": 2960 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -2.49599027633667, "logits/rejected": -2.0337436199188232, "logps/chosen": -284.77001953125, "logps/rejected": -206.2366180419922, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00591338612139225, "rewards/margins": 0.0626380667090416, "rewards/rejected": -0.0685514584183693, "step": 2970 }, { "epoch": 0.19, "learning_rate": 4.863966792312423e-06, "logits/chosen": -2.382023811340332, "logits/rejected": -2.142746686935425, "logps/chosen": -239.0797576904297, "logps/rejected": -208.9804229736328, "loss": 0.6889, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.023791249841451645, "rewards/margins": 0.10952029377222061, "rewards/rejected": -0.08572904765605927, "step": 2980 }, { "epoch": 0.2, "learning_rate": 4.862102793518145e-06, "logits/chosen": -2.2269492149353027, "logits/rejected": -2.290496349334717, "logps/chosen": -194.52423095703125, "logps/rejected": -209.1487274169922, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0024234852753579617, "rewards/margins": 0.07699505239725113, "rewards/rejected": -0.07941852509975433, "step": 2990 }, { "epoch": 0.2, "learning_rate": 4.8602264728386075e-06, "logits/chosen": -2.3325839042663574, "logits/rejected": -2.184682607650757, "logps/chosen": -252.10238647460938, "logps/rejected": -256.3367919921875, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00397127028554678, "rewards/margins": 0.07653030008077621, "rewards/rejected": -0.07255902886390686, "step": 3000 }, { "epoch": 0.2, "eval_logits/chosen": -2.3181509971618652, "eval_logits/rejected": -2.131211280822754, "eval_logps/chosen": -230.46095275878906, "eval_logps/rejected": -218.1745147705078, "eval_loss": 0.6903403401374817, "eval_rewards/accuracies": 0.6244999766349792, "eval_rewards/chosen": 0.015439935959875584, "eval_rewards/margins": 0.08106595277786255, "eval_rewards/rejected": -0.06562602519989014, "eval_runtime": 709.3669, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.41, "step": 3000 }, { "epoch": 0.2, "learning_rate": 4.858337840061616e-06, "logits/chosen": -2.309683084487915, "logits/rejected": -2.230560779571533, "logps/chosen": -180.07546997070312, "logps/rejected": -241.62252807617188, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.030049040913581848, "rewards/margins": 0.07897917181253433, "rewards/rejected": -0.048930130898952484, "step": 3010 }, { "epoch": 0.2, "learning_rate": 4.856436905039208e-06, "logits/chosen": -2.3335537910461426, "logits/rejected": -2.174056053161621, "logps/chosen": -207.0081024169922, "logps/rejected": -181.50457763671875, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04196963459253311, "rewards/margins": 0.08883820474147797, "rewards/rejected": -0.046868570148944855, "step": 3020 }, { "epoch": 0.2, "learning_rate": 4.854523677687588e-06, "logits/chosen": -2.2027428150177, "logits/rejected": -2.271785259246826, "logps/chosen": -176.38583374023438, "logps/rejected": -201.62588500976562, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": 0.048596903681755066, "rewards/margins": 0.06843477487564087, "rewards/rejected": -0.0198378786444664, "step": 3030 }, { "epoch": 0.2, "learning_rate": 4.85259816798709e-06, "logits/chosen": -2.4263253211975098, "logits/rejected": -1.8797962665557861, "logps/chosen": -281.78717041015625, "logps/rejected": -212.32394409179688, "loss": 0.6909, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.059749238193035126, "rewards/margins": 0.11092700809240341, "rewards/rejected": -0.05117777734994888, "step": 3040 }, { "epoch": 0.2, "learning_rate": 4.850660385982114e-06, "logits/chosen": -2.4107866287231445, "logits/rejected": -2.2419321537017822, "logps/chosen": -243.84481811523438, "logps/rejected": -195.49806213378906, "loss": 0.6884, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0387430340051651, "rewards/margins": 0.06934549659490585, "rewards/rejected": -0.030602458864450455, "step": 3050 }, { "epoch": 0.2, "learning_rate": 4.848710341781081e-06, "logits/chosen": -2.152615547180176, "logits/rejected": -2.259021043777466, "logps/chosen": -176.58517456054688, "logps/rejected": -175.9096221923828, "loss": 0.6923, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.026836439967155457, "rewards/margins": 0.050511687994003296, "rewards/rejected": -0.07734812796115875, "step": 3060 }, { "epoch": 0.2, "learning_rate": 4.846748045556377e-06, "logits/chosen": -2.3441312313079834, "logits/rejected": -2.0264244079589844, "logps/chosen": -239.79904174804688, "logps/rejected": -186.3507537841797, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.003119309199973941, "rewards/margins": 0.07062678039073944, "rewards/rejected": -0.07374609261751175, "step": 3070 }, { "epoch": 0.2, "learning_rate": 4.8447735075442995e-06, "logits/chosen": -2.2217564582824707, "logits/rejected": -2.284585952758789, "logps/chosen": -201.0135498046875, "logps/rejected": -206.20608520507812, "loss": 0.6903, "rewards/accuracies": 0.625, "rewards/chosen": -0.03451332077383995, "rewards/margins": 0.08593029528856277, "rewards/rejected": -0.12044362723827362, "step": 3080 }, { "epoch": 0.2, "learning_rate": 4.8427867380450075e-06, "logits/chosen": -2.406268835067749, "logits/rejected": -2.0118308067321777, "logps/chosen": -234.0061798095703, "logps/rejected": -197.52023315429688, "loss": 0.6894, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.036669012159109116, "rewards/margins": 0.09763816744089127, "rewards/rejected": -0.13430717587471008, "step": 3090 }, { "epoch": 0.2, "learning_rate": 4.840787747422462e-06, "logits/chosen": -2.3698010444641113, "logits/rejected": -2.114318609237671, "logps/chosen": -199.02552795410156, "logps/rejected": -175.43988037109375, "loss": 0.692, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.033694010227918625, "rewards/margins": 0.06671958416700363, "rewards/rejected": -0.10041359812021255, "step": 3100 }, { "epoch": 0.2, "eval_logits/chosen": -2.3160300254821777, "eval_logits/rejected": -2.129782199859619, "eval_logps/chosen": -235.4629669189453, "eval_logps/rejected": -223.5566864013672, "eval_loss": 0.6903056502342224, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": -0.034580256789922714, "eval_rewards/margins": 0.08486771583557129, "eval_rewards/rejected": -0.1194479689002037, "eval_runtime": 712.862, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 3100 }, { "epoch": 0.2, "learning_rate": 4.838776546104378e-06, "logits/chosen": -2.2874035835266113, "logits/rejected": -2.2883827686309814, "logps/chosen": -282.4599609375, "logps/rejected": -252.1973876953125, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015067142434418201, "rewards/margins": 0.10139371454715729, "rewards/rejected": -0.11646085977554321, "step": 3110 }, { "epoch": 0.2, "learning_rate": 4.836753144582168e-06, "logits/chosen": -2.2503442764282227, "logits/rejected": -2.0378506183624268, "logps/chosen": -245.8728790283203, "logps/rejected": -234.16201782226562, "loss": 0.6882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03641275316476822, "rewards/margins": 0.12401758134365082, "rewards/rejected": -0.16043034195899963, "step": 3120 }, { "epoch": 0.2, "learning_rate": 4.834717553410884e-06, "logits/chosen": -2.3153603076934814, "logits/rejected": -2.077373743057251, "logps/chosen": -190.7818145751953, "logps/rejected": -213.3385009765625, "loss": 0.6909, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.015058299526572227, "rewards/margins": 0.08887463808059692, "rewards/rejected": -0.1039329394698143, "step": 3130 }, { "epoch": 0.21, "learning_rate": 4.832669783209167e-06, "logits/chosen": -2.192064046859741, "logits/rejected": -2.241379499435425, "logps/chosen": -245.5317840576172, "logps/rejected": -248.6998291015625, "loss": 0.6933, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.011057475581765175, "rewards/margins": 0.026191571727395058, "rewards/rejected": -0.03724905103445053, "step": 3140 }, { "epoch": 0.21, "learning_rate": 4.8306098446591895e-06, "logits/chosen": -1.8934345245361328, "logits/rejected": -2.0603950023651123, "logps/chosen": -177.72357177734375, "logps/rejected": -208.8456573486328, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.014547166414558887, "rewards/margins": 0.06190754845738411, "rewards/rejected": -0.07645471394062042, "step": 3150 }, { "epoch": 0.21, "learning_rate": 4.828537748506601e-06, "logits/chosen": -2.411770820617676, "logits/rejected": -2.1453702449798584, "logps/chosen": -273.4091796875, "logps/rejected": -221.22647094726562, "loss": 0.6925, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.00010936595208477229, "rewards/margins": 0.04063498228788376, "rewards/rejected": -0.040744349360466, "step": 3160 }, { "epoch": 0.21, "learning_rate": 4.826453505560469e-06, "logits/chosen": -2.0967440605163574, "logits/rejected": -2.0667638778686523, "logps/chosen": -192.63839721679688, "logps/rejected": -184.79087829589844, "loss": 0.6903, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.016592925414443016, "rewards/margins": 0.05849025771021843, "rewards/rejected": -0.07508319616317749, "step": 3170 }, { "epoch": 0.21, "learning_rate": 4.824357126693226e-06, "logits/chosen": -2.206259250640869, "logits/rejected": -1.8263355493545532, "logps/chosen": -260.7179260253906, "logps/rejected": -217.93618774414062, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01974749192595482, "rewards/margins": 0.05945660546422005, "rewards/rejected": -0.07920410484075546, "step": 3180 }, { "epoch": 0.21, "learning_rate": 4.8222486228406105e-06, "logits/chosen": -2.3845696449279785, "logits/rejected": -2.098829746246338, "logps/chosen": -211.3022003173828, "logps/rejected": -183.42645263671875, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.0006182378274388611, "rewards/margins": 0.06805343925952911, "rewards/rejected": -0.06867166608572006, "step": 3190 }, { "epoch": 0.21, "learning_rate": 4.820128005001612e-06, "logits/chosen": -2.0764338970184326, "logits/rejected": -1.9985427856445312, "logps/chosen": -217.34097290039062, "logps/rejected": -208.50668334960938, "loss": 0.687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.016526032239198685, "rewards/margins": 0.12708571553230286, "rewards/rejected": -0.11055967956781387, "step": 3200 }, { "epoch": 0.21, "eval_logits/chosen": -2.3211934566497803, "eval_logits/rejected": -2.1343932151794434, "eval_logps/chosen": -233.4681854248047, "eval_logps/rejected": -220.65005493164062, "eval_loss": 0.6902753114700317, "eval_rewards/accuracies": 0.6209999918937683, "eval_rewards/chosen": -0.014632347039878368, "eval_rewards/margins": 0.07574935257434845, "eval_rewards/rejected": -0.0903816968202591, "eval_runtime": 710.7834, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 3200 }, { "epoch": 0.21, "learning_rate": 4.817995284238412e-06, "logits/chosen": -2.1152288913726807, "logits/rejected": -2.1994452476501465, "logps/chosen": -198.7126007080078, "logps/rejected": -236.6934356689453, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": -0.021882567554712296, "rewards/margins": 0.10495994985103607, "rewards/rejected": -0.12684252858161926, "step": 3210 }, { "epoch": 0.21, "learning_rate": 4.815850471676327e-06, "logits/chosen": -2.2534170150756836, "logits/rejected": -2.134138822555542, "logps/chosen": -238.7698211669922, "logps/rejected": -244.8793487548828, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.003558219876140356, "rewards/margins": 0.10356787592172623, "rewards/rejected": -0.10712607949972153, "step": 3220 }, { "epoch": 0.21, "learning_rate": 4.813693578503751e-06, "logits/chosen": -2.303338050842285, "logits/rejected": -2.123116970062256, "logps/chosen": -295.8538513183594, "logps/rejected": -247.59762573242188, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0065292296931147575, "rewards/margins": 0.072014220058918, "rewards/rejected": -0.06548498570919037, "step": 3230 }, { "epoch": 0.21, "learning_rate": 4.811524615972093e-06, "logits/chosen": -2.3142409324645996, "logits/rejected": -2.1741249561309814, "logps/chosen": -230.9072265625, "logps/rejected": -246.0470733642578, "loss": 0.6899, "rewards/accuracies": 0.5, "rewards/chosen": -0.009716503322124481, "rewards/margins": 0.06937507539987564, "rewards/rejected": -0.07909159362316132, "step": 3240 }, { "epoch": 0.21, "learning_rate": 4.809343595395724e-06, "logits/chosen": -2.51108455657959, "logits/rejected": -2.3194468021392822, "logps/chosen": -191.16067504882812, "logps/rejected": -164.18856811523438, "loss": 0.6909, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.021050242707133293, "rewards/margins": 0.03616604954004288, "rewards/rejected": -0.05721629410982132, "step": 3250 }, { "epoch": 0.21, "learning_rate": 4.807150528151918e-06, "logits/chosen": -2.336385726928711, "logits/rejected": -2.1668715476989746, "logps/chosen": -166.63986206054688, "logps/rejected": -193.17758178710938, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0005269769462756813, "rewards/margins": 0.0867081880569458, "rewards/rejected": -0.08618120849132538, "step": 3260 }, { "epoch": 0.21, "learning_rate": 4.804945425680787e-06, "logits/chosen": -2.288424253463745, "logits/rejected": -2.3639869689941406, "logps/chosen": -190.94607543945312, "logps/rejected": -174.963134765625, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.025028562173247337, "rewards/margins": 0.04095301777124405, "rewards/rejected": -0.06598157435655594, "step": 3270 }, { "epoch": 0.21, "learning_rate": 4.802728299485225e-06, "logits/chosen": -2.146742343902588, "logits/rejected": -2.1346192359924316, "logps/chosen": -153.60440063476562, "logps/rejected": -180.19187927246094, "loss": 0.6901, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.02999986708164215, "rewards/margins": 0.050558023154735565, "rewards/rejected": -0.08055789768695831, "step": 3280 }, { "epoch": 0.22, "learning_rate": 4.8004991611308495e-06, "logits/chosen": -2.4337520599365234, "logits/rejected": -2.1402642726898193, "logps/chosen": -240.08847045898438, "logps/rejected": -231.9585418701172, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.014652663841843605, "rewards/margins": 0.07920269668102264, "rewards/rejected": -0.06455003470182419, "step": 3290 }, { "epoch": 0.22, "learning_rate": 4.798258022245937e-06, "logits/chosen": -2.3729190826416016, "logits/rejected": -1.9661096334457397, "logps/chosen": -218.466064453125, "logps/rejected": -187.5246124267578, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.00859595276415348, "rewards/margins": 0.07523629814386368, "rewards/rejected": -0.08383224904537201, "step": 3300 }, { "epoch": 0.22, "eval_logits/chosen": -2.309415817260742, "eval_logits/rejected": -2.1233413219451904, "eval_logps/chosen": -232.6118621826172, "eval_logps/rejected": -220.64340209960938, "eval_loss": 0.6902174353599548, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.006068930495530367, "eval_rewards/margins": 0.08424630761146545, "eval_rewards/rejected": -0.09031523764133453, "eval_runtime": 712.8632, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 3300 }, { "epoch": 0.22, "learning_rate": 4.796004894521365e-06, "logits/chosen": -2.3003628253936768, "logits/rejected": -2.1167216300964355, "logps/chosen": -230.6715850830078, "logps/rejected": -260.5556640625, "loss": 0.6903, "rewards/accuracies": 0.5, "rewards/chosen": -0.021018046885728836, "rewards/margins": 0.08627601712942123, "rewards/rejected": -0.10729406774044037, "step": 3310 }, { "epoch": 0.22, "learning_rate": 4.7937397897105545e-06, "logits/chosen": -2.290663242340088, "logits/rejected": -2.2099320888519287, "logps/chosen": -203.26271057128906, "logps/rejected": -182.44137573242188, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.019701208919286728, "rewards/margins": 0.043921031057834625, "rewards/rejected": -0.024219822138547897, "step": 3320 }, { "epoch": 0.22, "learning_rate": 4.791462719629399e-06, "logits/chosen": -2.3039164543151855, "logits/rejected": -2.1613926887512207, "logps/chosen": -183.3705596923828, "logps/rejected": -171.16586303710938, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.023545963689684868, "rewards/margins": 0.10206764936447144, "rewards/rejected": -0.07852168381214142, "step": 3330 }, { "epoch": 0.22, "learning_rate": 4.789173696156212e-06, "logits/chosen": -2.320606231689453, "logits/rejected": -1.9661529064178467, "logps/chosen": -271.17156982421875, "logps/rejected": -267.260009765625, "loss": 0.6869, "rewards/accuracies": 0.75, "rewards/chosen": 0.05269026756286621, "rewards/margins": 0.14737890660762787, "rewards/rejected": -0.09468863904476166, "step": 3340 }, { "epoch": 0.22, "learning_rate": 4.786872731231662e-06, "logits/chosen": -2.3447282314300537, "logits/rejected": -2.2217040061950684, "logps/chosen": -214.87109375, "logps/rejected": -212.9879608154297, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02019861713051796, "rewards/margins": 0.09485939145088196, "rewards/rejected": -0.0746607705950737, "step": 3350 }, { "epoch": 0.22, "learning_rate": 4.784559836858709e-06, "logits/chosen": -2.318398952484131, "logits/rejected": -1.8477122783660889, "logps/chosen": -234.3660888671875, "logps/rejected": -210.38357543945312, "loss": 0.6908, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.004489635583013296, "rewards/margins": 0.06438425183296204, "rewards/rejected": -0.06887389719486237, "step": 3360 }, { "epoch": 0.22, "learning_rate": 4.782235025102542e-06, "logits/chosen": -2.312790632247925, "logits/rejected": -2.242957830429077, "logps/chosen": -228.43661499023438, "logps/rejected": -220.25283813476562, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.021173015236854553, "rewards/margins": 0.092967689037323, "rewards/rejected": -0.11414071172475815, "step": 3370 }, { "epoch": 0.22, "learning_rate": 4.779898308090519e-06, "logits/chosen": -2.2664966583251953, "logits/rejected": -2.0547492504119873, "logps/chosen": -278.0267333984375, "logps/rejected": -248.862548828125, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03349475562572479, "rewards/margins": 0.07666581869125366, "rewards/rejected": -0.11016058921813965, "step": 3380 }, { "epoch": 0.22, "learning_rate": 4.777549698012101e-06, "logits/chosen": -2.216127395629883, "logits/rejected": -2.0575273036956787, "logps/chosen": -244.1732177734375, "logps/rejected": -233.697265625, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009566160850226879, "rewards/margins": 0.08683688193559647, "rewards/rejected": -0.09640304744243622, "step": 3390 }, { "epoch": 0.22, "learning_rate": 4.775189207118787e-06, "logits/chosen": -2.2499351501464844, "logits/rejected": -2.054161548614502, "logps/chosen": -271.6486511230469, "logps/rejected": -258.4850158691406, "loss": 0.6908, "rewards/accuracies": 0.75, "rewards/chosen": 0.0016902908682823181, "rewards/margins": 0.09174026548862457, "rewards/rejected": -0.09004998207092285, "step": 3400 }, { "epoch": 0.22, "eval_logits/chosen": -2.3067705631256104, "eval_logits/rejected": -2.121011734008789, "eval_logps/chosen": -233.02999877929688, "eval_logps/rejected": -220.44912719726562, "eval_loss": 0.6904054880142212, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.010250742547214031, "eval_rewards/margins": 0.07812146842479706, "eval_rewards/rejected": -0.08837221562862396, "eval_runtime": 711.3718, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 3400 }, { "epoch": 0.22, "learning_rate": 4.772816847724054e-06, "logits/chosen": -2.405539035797119, "logits/rejected": -2.133857011795044, "logps/chosen": -224.6778106689453, "logps/rejected": -227.91207885742188, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.021232225000858307, "rewards/margins": 0.03736606985330582, "rewards/rejected": -0.058598291128873825, "step": 3410 }, { "epoch": 0.22, "learning_rate": 4.770432632203294e-06, "logits/chosen": -2.139242649078369, "logits/rejected": -2.053284168243408, "logps/chosen": -248.99319458007812, "logps/rejected": -202.07363891601562, "loss": 0.6925, "rewards/accuracies": 0.5, "rewards/chosen": -0.053460635244846344, "rewards/margins": 0.03398740664124489, "rewards/rejected": -0.08744804561138153, "step": 3420 }, { "epoch": 0.22, "learning_rate": 4.768036572993738e-06, "logits/chosen": -2.1945345401763916, "logits/rejected": -2.288442611694336, "logps/chosen": -285.4275207519531, "logps/rejected": -275.6911315917969, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054085589945316315, "rewards/margins": 0.05863531306385994, "rewards/rejected": -0.11272089183330536, "step": 3430 }, { "epoch": 0.23, "learning_rate": 4.765628682594409e-06, "logits/chosen": -2.3740832805633545, "logits/rejected": -2.1946797370910645, "logps/chosen": -246.08438110351562, "logps/rejected": -229.9820098876953, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.023272987455129623, "rewards/margins": 0.07800062000751495, "rewards/rejected": -0.10127361863851547, "step": 3440 }, { "epoch": 0.23, "learning_rate": 4.763208973566041e-06, "logits/chosen": -2.185068130493164, "logits/rejected": -2.204409122467041, "logps/chosen": -187.07476806640625, "logps/rejected": -213.8040771484375, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.029450953006744385, "rewards/margins": 0.08014042675495148, "rewards/rejected": -0.10959136486053467, "step": 3450 }, { "epoch": 0.23, "learning_rate": 4.76077745853102e-06, "logits/chosen": -2.4352564811706543, "logits/rejected": -2.2802255153656006, "logps/chosen": -252.4907684326172, "logps/rejected": -262.39654541015625, "loss": 0.6912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024366283789277077, "rewards/margins": 0.08780606091022491, "rewards/rejected": -0.11217234283685684, "step": 3460 }, { "epoch": 0.23, "learning_rate": 4.758334150173322e-06, "logits/chosen": -2.3180832862854004, "logits/rejected": -2.12862491607666, "logps/chosen": -261.38922119140625, "logps/rejected": -242.67416381835938, "loss": 0.693, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01694723591208458, "rewards/margins": 0.06338542699813843, "rewards/rejected": -0.04643818736076355, "step": 3470 }, { "epoch": 0.23, "learning_rate": 4.755879061238439e-06, "logits/chosen": -2.39463472366333, "logits/rejected": -2.1686863899230957, "logps/chosen": -254.03067016601562, "logps/rejected": -246.9172821044922, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02142667956650257, "rewards/margins": 0.05523737147450447, "rewards/rejected": -0.03381068632006645, "step": 3480 }, { "epoch": 0.23, "learning_rate": 4.753412204533317e-06, "logits/chosen": -2.5263428688049316, "logits/rejected": -2.0499589443206787, "logps/chosen": -260.4190673828125, "logps/rejected": -223.6571502685547, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.024389993399381638, "rewards/margins": 0.0862947553396225, "rewards/rejected": -0.061904750764369965, "step": 3490 }, { "epoch": 0.23, "learning_rate": 4.750933592926292e-06, "logits/chosen": -2.398818254470825, "logits/rejected": -2.0742526054382324, "logps/chosen": -217.53012084960938, "logps/rejected": -198.85635375976562, "loss": 0.6901, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.011975173838436604, "rewards/margins": 0.08542615175247192, "rewards/rejected": -0.073450967669487, "step": 3500 }, { "epoch": 0.23, "eval_logits/chosen": -2.3047008514404297, "eval_logits/rejected": -2.119340419769287, "eval_logps/chosen": -230.0756072998047, "eval_logps/rejected": -217.86997985839844, "eval_loss": 0.6902625560760498, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.019293660297989845, "eval_rewards/margins": 0.0818745344877243, "eval_rewards/rejected": -0.06258086860179901, "eval_runtime": 712.428, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 3500 }, { "epoch": 0.23, "learning_rate": 4.7484432393470124e-06, "logits/chosen": -2.486417531967163, "logits/rejected": -1.9664733409881592, "logps/chosen": -199.4810028076172, "logps/rejected": -157.95590209960938, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.026009265333414078, "rewards/margins": 0.13871736824512482, "rewards/rejected": -0.11270810663700104, "step": 3510 }, { "epoch": 0.23, "learning_rate": 4.745941156786385e-06, "logits/chosen": -2.092363119125366, "logits/rejected": -2.062434434890747, "logps/chosen": -150.09767150878906, "logps/rejected": -195.23306274414062, "loss": 0.6845, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.029414480552077293, "rewards/margins": 0.1429421305656433, "rewards/rejected": -0.11352765560150146, "step": 3520 }, { "epoch": 0.23, "learning_rate": 4.743427358296497e-06, "logits/chosen": -2.2270138263702393, "logits/rejected": -2.030658483505249, "logps/chosen": -187.15162658691406, "logps/rejected": -217.05062866210938, "loss": 0.6858, "rewards/accuracies": 0.75, "rewards/chosen": 0.025566350668668747, "rewards/margins": 0.18591654300689697, "rewards/rejected": -0.16035018861293793, "step": 3530 }, { "epoch": 0.23, "learning_rate": 4.740901856990553e-06, "logits/chosen": -2.149793863296509, "logits/rejected": -1.9605810642242432, "logps/chosen": -255.32052612304688, "logps/rejected": -219.46920776367188, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.013713860884308815, "rewards/margins": 0.08221259713172913, "rewards/rejected": -0.06849874556064606, "step": 3540 }, { "epoch": 0.23, "learning_rate": 4.738364666042804e-06, "logits/chosen": -2.3816933631896973, "logits/rejected": -1.9584366083145142, "logps/chosen": -286.97796630859375, "logps/rejected": -231.30648803710938, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0476701520383358, "rewards/margins": 0.07968376576900482, "rewards/rejected": -0.03201361373066902, "step": 3550 }, { "epoch": 0.23, "learning_rate": 4.735815798688483e-06, "logits/chosen": -2.3232216835021973, "logits/rejected": -2.1071863174438477, "logps/chosen": -194.87954711914062, "logps/rejected": -224.50369262695312, "loss": 0.6873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03452432155609131, "rewards/margins": 0.09485017508268356, "rewards/rejected": -0.06032586842775345, "step": 3560 }, { "epoch": 0.23, "learning_rate": 4.7332552682237285e-06, "logits/chosen": -2.3406100273132324, "logits/rejected": -1.8915197849273682, "logps/chosen": -169.45468139648438, "logps/rejected": -163.56141662597656, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.043052736669778824, "rewards/margins": 0.09879221022129059, "rewards/rejected": -0.05573946237564087, "step": 3570 }, { "epoch": 0.23, "learning_rate": 4.7306830880055234e-06, "logits/chosen": -2.3042500019073486, "logits/rejected": -2.2339184284210205, "logps/chosen": -191.6345672607422, "logps/rejected": -207.58676147460938, "loss": 0.6895, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00900744367390871, "rewards/margins": 0.07615131884813309, "rewards/rejected": -0.08515877276659012, "step": 3580 }, { "epoch": 0.23, "learning_rate": 4.728099271451619e-06, "logits/chosen": -2.341984510421753, "logits/rejected": -2.2418646812438965, "logps/chosen": -190.93679809570312, "logps/rejected": -190.25389099121094, "loss": 0.6903, "rewards/accuracies": 0.625, "rewards/chosen": 0.0007123596733435988, "rewards/margins": 0.06269621104001999, "rewards/rejected": -0.0619838610291481, "step": 3590 }, { "epoch": 0.24, "learning_rate": 4.725503832040466e-06, "logits/chosen": -2.1511435508728027, "logits/rejected": -2.1525609493255615, "logps/chosen": -148.30784606933594, "logps/rejected": -181.79171752929688, "loss": 0.6913, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.008500255644321442, "rewards/margins": 0.07396461069583893, "rewards/rejected": -0.06546434760093689, "step": 3600 }, { "epoch": 0.24, "eval_logits/chosen": -2.3040733337402344, "eval_logits/rejected": -2.11887264251709, "eval_logps/chosen": -230.52880859375, "eval_logps/rejected": -218.516357421875, "eval_loss": 0.6901616454124451, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.014761154539883137, "eval_rewards/margins": 0.08380559831857681, "eval_rewards/rejected": -0.06904443353414536, "eval_runtime": 712.8273, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 3600 }, { "epoch": 0.24, "learning_rate": 4.722896783311152e-06, "logits/chosen": -2.282073497772217, "logits/rejected": -2.17645263671875, "logps/chosen": -259.38861083984375, "logps/rejected": -316.8056640625, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.003396064043045044, "rewards/margins": 0.06283075362443924, "rewards/rejected": -0.06622681021690369, "step": 3610 }, { "epoch": 0.24, "learning_rate": 4.720278138863318e-06, "logits/chosen": -2.4280340671539307, "logits/rejected": -2.218613862991333, "logps/chosen": -190.4235382080078, "logps/rejected": -164.82579040527344, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": -0.003018149407580495, "rewards/margins": 0.0620940737426281, "rewards/rejected": -0.06511221826076508, "step": 3620 }, { "epoch": 0.24, "learning_rate": 4.717647912357095e-06, "logits/chosen": -2.361996650695801, "logits/rejected": -2.448129892349243, "logps/chosen": -275.29071044921875, "logps/rejected": -289.21759033203125, "loss": 0.6921, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.02813585475087166, "rewards/margins": 0.009944294579327106, "rewards/rejected": -0.03808014467358589, "step": 3630 }, { "epoch": 0.24, "learning_rate": 4.715006117513035e-06, "logits/chosen": -2.460373640060425, "logits/rejected": -2.220986843109131, "logps/chosen": -321.8302307128906, "logps/rejected": -273.5966796875, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.022954054176807404, "rewards/margins": 0.06946495175361633, "rewards/rejected": -0.04651089757680893, "step": 3640 }, { "epoch": 0.24, "learning_rate": 4.7123527681120326e-06, "logits/chosen": -2.275266647338867, "logits/rejected": -2.134054660797119, "logps/chosen": -247.04855346679688, "logps/rejected": -226.111328125, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0012259014183655381, "rewards/margins": 0.07214462757110596, "rewards/rejected": -0.0709187239408493, "step": 3650 }, { "epoch": 0.24, "learning_rate": 4.7096878779952594e-06, "logits/chosen": -2.357933759689331, "logits/rejected": -2.303584575653076, "logps/chosen": -275.9615783691406, "logps/rejected": -278.7004089355469, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": -0.005482043139636517, "rewards/margins": 0.05321590229868889, "rewards/rejected": -0.058697957545518875, "step": 3660 }, { "epoch": 0.24, "learning_rate": 4.707011461064086e-06, "logits/chosen": -2.159414768218994, "logits/rejected": -1.9229214191436768, "logps/chosen": -308.0876770019531, "logps/rejected": -274.2186584472656, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0077134473249316216, "rewards/margins": 0.10321645438671112, "rewards/rejected": -0.09550300985574722, "step": 3670 }, { "epoch": 0.24, "learning_rate": 4.704323531280016e-06, "logits/chosen": -2.2135162353515625, "logits/rejected": -2.040491819381714, "logps/chosen": -324.78515625, "logps/rejected": -248.89889526367188, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02426346018910408, "rewards/margins": 0.07309317588806152, "rewards/rejected": -0.048829711973667145, "step": 3680 }, { "epoch": 0.24, "learning_rate": 4.701624102664606e-06, "logits/chosen": -2.370241165161133, "logits/rejected": -2.0312302112579346, "logps/chosen": -262.2061767578125, "logps/rejected": -215.11416625976562, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010902756825089455, "rewards/margins": 0.07008221745491028, "rewards/rejected": -0.08098497986793518, "step": 3690 }, { "epoch": 0.24, "learning_rate": 4.698913189299399e-06, "logits/chosen": -2.2025485038757324, "logits/rejected": -2.3091206550598145, "logps/chosen": -187.55035400390625, "logps/rejected": -225.8077850341797, "loss": 0.694, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.013985480181872845, "rewards/margins": 0.05750720947980881, "rewards/rejected": -0.07149268686771393, "step": 3700 }, { "epoch": 0.24, "eval_logits/chosen": -2.281970262527466, "eval_logits/rejected": -2.0983099937438965, "eval_logps/chosen": -234.87881469726562, "eval_logps/rejected": -221.8666534423828, "eval_loss": 0.6903954744338989, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.028738651424646378, "eval_rewards/margins": 0.0738087072968483, "eval_rewards/rejected": -0.10254734754562378, "eval_runtime": 711.1172, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 3700 }, { "epoch": 0.24, "learning_rate": 4.696190805325847e-06, "logits/chosen": -2.2970728874206543, "logits/rejected": -2.1544101238250732, "logps/chosen": -207.85110473632812, "logps/rejected": -189.63479614257812, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014722605235874653, "rewards/margins": 0.08902369439601898, "rewards/rejected": -0.10374629497528076, "step": 3710 }, { "epoch": 0.24, "learning_rate": 4.693456964945239e-06, "logits/chosen": -2.416215419769287, "logits/rejected": -1.9415165185928345, "logps/chosen": -298.4938049316406, "logps/rejected": -207.4619598388672, "loss": 0.6882, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.003882091958075762, "rewards/margins": 0.10060401260852814, "rewards/rejected": -0.10448610782623291, "step": 3720 }, { "epoch": 0.24, "learning_rate": 4.6907116824186245e-06, "logits/chosen": -2.3689780235290527, "logits/rejected": -2.3212902545928955, "logps/chosen": -226.6787109375, "logps/rejected": -231.8772430419922, "loss": 0.689, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.006488241255283356, "rewards/margins": 0.05394769459962845, "rewards/rejected": -0.047459445893764496, "step": 3730 }, { "epoch": 0.24, "learning_rate": 4.687954972066742e-06, "logits/chosen": -2.260472297668457, "logits/rejected": -1.9865401983261108, "logps/chosen": -227.1370086669922, "logps/rejected": -220.00808715820312, "loss": 0.6858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.026792461052536964, "rewards/margins": 0.1374204158782959, "rewards/rejected": -0.11062794923782349, "step": 3740 }, { "epoch": 0.25, "learning_rate": 4.685186848269944e-06, "logits/chosen": -2.2468438148498535, "logits/rejected": -2.11405873298645, "logps/chosen": -209.9337615966797, "logps/rejected": -178.09884643554688, "loss": 0.6915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01938585564494133, "rewards/margins": 0.05544018745422363, "rewards/rejected": -0.0360543318092823, "step": 3750 }, { "epoch": 0.25, "learning_rate": 4.682407325468119e-06, "logits/chosen": -2.323763370513916, "logits/rejected": -1.967911720275879, "logps/chosen": -214.25634765625, "logps/rejected": -191.69644165039062, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03162650763988495, "rewards/margins": 0.11069830507040024, "rewards/rejected": -0.07907179743051529, "step": 3760 }, { "epoch": 0.25, "learning_rate": 4.67961641816062e-06, "logits/chosen": -2.301642417907715, "logits/rejected": -2.097708225250244, "logps/chosen": -271.3573303222656, "logps/rejected": -236.2880096435547, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.044394414871931076, "rewards/margins": 0.06260766088962555, "rewards/rejected": -0.018213242292404175, "step": 3770 }, { "epoch": 0.25, "learning_rate": 4.676814140906188e-06, "logits/chosen": -2.180407762527466, "logits/rejected": -2.048719882965088, "logps/chosen": -237.28604125976562, "logps/rejected": -216.0989227294922, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.022501787170767784, "rewards/margins": 0.09418568760156631, "rewards/rejected": -0.07168390601873398, "step": 3780 }, { "epoch": 0.25, "learning_rate": 4.674000508322872e-06, "logits/chosen": -2.022406578063965, "logits/rejected": -2.098022222518921, "logps/chosen": -214.1189727783203, "logps/rejected": -229.61483764648438, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": 0.0341593436896801, "rewards/margins": 0.06312253326177597, "rewards/rejected": -0.02896319329738617, "step": 3790 }, { "epoch": 0.25, "learning_rate": 4.671175535087959e-06, "logits/chosen": -2.194871664047241, "logits/rejected": -2.153036117553711, "logps/chosen": -285.90673828125, "logps/rejected": -293.92242431640625, "loss": 0.6891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.05420888587832451, "rewards/margins": 0.11247305572032928, "rewards/rejected": -0.05826416611671448, "step": 3800 }, { "epoch": 0.25, "eval_logits/chosen": -2.2757647037506104, "eval_logits/rejected": -2.092339277267456, "eval_logps/chosen": -227.5012969970703, "eval_logps/rejected": -213.98056030273438, "eval_loss": 0.6902437806129456, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": 0.04503653571009636, "eval_rewards/margins": 0.06872312724590302, "eval_rewards/rejected": -0.023686589673161507, "eval_runtime": 711.0018, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 3800 }, { "epoch": 0.25, "learning_rate": 4.6683392359378924e-06, "logits/chosen": -2.1588714122772217, "logits/rejected": -1.9822829961776733, "logps/chosen": -231.51382446289062, "logps/rejected": -211.3966827392578, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": 0.03635237738490105, "rewards/margins": 0.05399390310049057, "rewards/rejected": -0.017641523852944374, "step": 3810 }, { "epoch": 0.25, "learning_rate": 4.665491625668198e-06, "logits/chosen": -2.074720859527588, "logits/rejected": -2.1197152137756348, "logps/chosen": -153.5623321533203, "logps/rejected": -185.47169494628906, "loss": 0.6873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02684941329061985, "rewards/margins": 0.08354301750659943, "rewards/rejected": -0.05669360235333443, "step": 3820 }, { "epoch": 0.25, "learning_rate": 4.662632719133407e-06, "logits/chosen": -2.3514442443847656, "logits/rejected": -2.0640666484832764, "logps/chosen": -225.99966430664062, "logps/rejected": -167.13034057617188, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.047978900372982025, "rewards/margins": 0.08138148486614227, "rewards/rejected": -0.03340258076786995, "step": 3830 }, { "epoch": 0.25, "learning_rate": 4.659762531246974e-06, "logits/chosen": -2.2433078289031982, "logits/rejected": -2.0894062519073486, "logps/chosen": -214.85104370117188, "logps/rejected": -186.99905395507812, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0027232493739575148, "rewards/margins": 0.061046671122312546, "rewards/rejected": -0.06376992166042328, "step": 3840 }, { "epoch": 0.25, "learning_rate": 4.656881076981207e-06, "logits/chosen": -2.3131306171417236, "logits/rejected": -2.1745781898498535, "logps/chosen": -212.8335418701172, "logps/rejected": -200.85433959960938, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.003498140024021268, "rewards/margins": 0.059195131063461304, "rewards/rejected": -0.055697001516819, "step": 3850 }, { "epoch": 0.25, "learning_rate": 4.653988371367183e-06, "logits/chosen": -2.2689290046691895, "logits/rejected": -2.0051369667053223, "logps/chosen": -239.0817413330078, "logps/rejected": -183.85263061523438, "loss": 0.6928, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01722235046327114, "rewards/margins": 0.05978889390826225, "rewards/rejected": -0.04256654158234596, "step": 3860 }, { "epoch": 0.25, "learning_rate": 4.651084429494671e-06, "logits/chosen": -2.3513553142547607, "logits/rejected": -2.0689337253570557, "logps/chosen": -272.21990966796875, "logps/rejected": -197.9673614501953, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0313107892870903, "rewards/margins": 0.052712440490722656, "rewards/rejected": -0.021401654928922653, "step": 3870 }, { "epoch": 0.25, "learning_rate": 4.648169266512053e-06, "logits/chosen": -2.4198365211486816, "logits/rejected": -2.141869068145752, "logps/chosen": -219.36965942382812, "logps/rejected": -180.29354858398438, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": 0.0595845952630043, "rewards/margins": 0.058780230581760406, "rewards/rejected": 0.0008043628185987473, "step": 3880 }, { "epoch": 0.25, "learning_rate": 4.6452428976262505e-06, "logits/chosen": -2.23230242729187, "logits/rejected": -2.0137524604797363, "logps/chosen": -199.0391387939453, "logps/rejected": -166.9921112060547, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": 0.04752303659915924, "rewards/margins": 0.1227576732635498, "rewards/rejected": -0.07523464411497116, "step": 3890 }, { "epoch": 0.26, "learning_rate": 4.642305338102633e-06, "logits/chosen": -2.2830934524536133, "logits/rejected": -2.3560256958007812, "logps/chosen": -158.4663543701172, "logps/rejected": -184.8391876220703, "loss": 0.6877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.032832108438014984, "rewards/margins": 0.08227143436670303, "rewards/rejected": -0.04943932965397835, "step": 3900 }, { "epoch": 0.26, "eval_logits/chosen": -2.2935938835144043, "eval_logits/rejected": -2.108933210372925, "eval_logps/chosen": -229.80093383789062, "eval_logps/rejected": -217.31515502929688, "eval_loss": 0.6902133822441101, "eval_rewards/accuracies": 0.6244999766349792, "eval_rewards/chosen": 0.02204015851020813, "eval_rewards/margins": 0.07907257974147797, "eval_rewards/rejected": -0.05703242868185043, "eval_runtime": 711.7595, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 3900 }, { "epoch": 0.26, "learning_rate": 4.639356603264953e-06, "logits/chosen": -2.338958263397217, "logits/rejected": -2.1034774780273438, "logps/chosen": -240.74267578125, "logps/rejected": -219.4384307861328, "loss": 0.6924, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.010891283862292767, "rewards/margins": 0.04426593333482742, "rewards/rejected": -0.03337464481592178, "step": 3910 }, { "epoch": 0.26, "learning_rate": 4.636396708495255e-06, "logits/chosen": -2.1757044792175293, "logits/rejected": -2.1606650352478027, "logps/chosen": -225.34707641601562, "logps/rejected": -206.83816528320312, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.03848005831241608, "rewards/margins": 0.06334998458623886, "rewards/rejected": -0.024869924411177635, "step": 3920 }, { "epoch": 0.26, "learning_rate": 4.633425669233799e-06, "logits/chosen": -2.274425983428955, "logits/rejected": -2.2967169284820557, "logps/chosen": -230.49679565429688, "logps/rejected": -234.1136474609375, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04329800605773926, "rewards/margins": 0.08064167201519012, "rewards/rejected": -0.03734365105628967, "step": 3930 }, { "epoch": 0.26, "learning_rate": 4.6304435009789825e-06, "logits/chosen": -2.2880218029022217, "logits/rejected": -2.0706839561462402, "logps/chosen": -233.8865509033203, "logps/rejected": -172.7992401123047, "loss": 0.6893, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.03620678931474686, "rewards/margins": 0.09822587668895721, "rewards/rejected": -0.062019091099500656, "step": 3940 }, { "epoch": 0.26, "learning_rate": 4.627450219287256e-06, "logits/chosen": -2.3368616104125977, "logits/rejected": -2.1908602714538574, "logps/chosen": -177.8789825439453, "logps/rejected": -161.35159301757812, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03680109232664108, "rewards/margins": 0.05983690172433853, "rewards/rejected": -0.02303580567240715, "step": 3950 }, { "epoch": 0.26, "learning_rate": 4.624445839773042e-06, "logits/chosen": -2.2832131385803223, "logits/rejected": -2.2111704349517822, "logps/chosen": -169.68849182128906, "logps/rejected": -171.02833557128906, "loss": 0.6922, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.003228194313123822, "rewards/margins": 0.024037057533860207, "rewards/rejected": -0.020808864384889603, "step": 3960 }, { "epoch": 0.26, "learning_rate": 4.621430378108656e-06, "logits/chosen": -2.264580249786377, "logits/rejected": -2.1098790168762207, "logps/chosen": -257.55718994140625, "logps/rejected": -260.2860412597656, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": 0.002011381322517991, "rewards/margins": 0.09493207186460495, "rewards/rejected": -0.092920683324337, "step": 3970 }, { "epoch": 0.26, "learning_rate": 4.618403850024223e-06, "logits/chosen": -2.1793527603149414, "logits/rejected": -1.9493013620376587, "logps/chosen": -256.003173828125, "logps/rejected": -215.7708282470703, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.005130067467689514, "rewards/margins": 0.06429970264434814, "rewards/rejected": -0.06942977011203766, "step": 3980 }, { "epoch": 0.26, "learning_rate": 4.615366271307598e-06, "logits/chosen": -2.3207273483276367, "logits/rejected": -2.164661407470703, "logps/chosen": -196.49105834960938, "logps/rejected": -191.2655029296875, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.045480918139219284, "rewards/margins": 0.06666766852140427, "rewards/rejected": -0.11214858293533325, "step": 3990 }, { "epoch": 0.26, "learning_rate": 4.612317657804277e-06, "logits/chosen": -2.1907215118408203, "logits/rejected": -2.2445192337036133, "logps/chosen": -149.4791259765625, "logps/rejected": -210.52490234375, "loss": 0.6884, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.02246091142296791, "rewards/margins": 0.09697895497083664, "rewards/rejected": -0.11943986266851425, "step": 4000 }, { "epoch": 0.26, "eval_logits/chosen": -2.2913191318511963, "eval_logits/rejected": -2.106405258178711, "eval_logps/chosen": -232.1314697265625, "eval_logps/rejected": -219.69049072265625, "eval_loss": 0.6901000738143921, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.0012654466554522514, "eval_rewards/margins": 0.07952029258012772, "eval_rewards/rejected": -0.08078574389219284, "eval_runtime": 712.6087, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.6092580254173236e-06, "logits/chosen": -2.1913225650787354, "logits/rejected": -1.959183931350708, "logps/chosen": -258.2712097167969, "logps/rejected": -247.7600555419922, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00948442704975605, "rewards/margins": 0.08156983554363251, "rewards/rejected": -0.09105426073074341, "step": 4010 }, { "epoch": 0.26, "learning_rate": 4.606187390107277e-06, "logits/chosen": -2.1633963584899902, "logits/rejected": -1.9803078174591064, "logps/chosen": -230.2688751220703, "logps/rejected": -197.57717895507812, "loss": 0.6914, "rewards/accuracies": 0.5, "rewards/chosen": -0.053934670984745026, "rewards/margins": 0.06301041692495346, "rewards/rejected": -0.11694508790969849, "step": 4020 }, { "epoch": 0.26, "learning_rate": 4.603105767892077e-06, "logits/chosen": -2.264932632446289, "logits/rejected": -2.1878082752227783, "logps/chosen": -195.32559204101562, "logps/rejected": -221.0668182373047, "loss": 0.6907, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0063558658584952354, "rewards/margins": 0.07787985354661942, "rewards/rejected": -0.08423570543527603, "step": 4030 }, { "epoch": 0.26, "learning_rate": 4.6000131748469725e-06, "logits/chosen": -2.3445935249328613, "logits/rejected": -1.9680637121200562, "logps/chosen": -250.29660034179688, "logps/rejected": -182.46685791015625, "loss": 0.6903, "rewards/accuracies": 0.5, "rewards/chosen": -0.0015633717412129045, "rewards/margins": 0.0648859366774559, "rewards/rejected": -0.06644931435585022, "step": 4040 }, { "epoch": 0.26, "learning_rate": 4.596909627104445e-06, "logits/chosen": -2.3850934505462646, "logits/rejected": -2.3055262565612793, "logps/chosen": -251.54226684570312, "logps/rejected": -226.3249053955078, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.028039926663041115, "rewards/margins": 0.07416818290948868, "rewards/rejected": -0.10220811516046524, "step": 4050 }, { "epoch": 0.27, "learning_rate": 4.5937951408541215e-06, "logits/chosen": -2.433464527130127, "logits/rejected": -1.9047530889511108, "logps/chosen": -254.9823760986328, "logps/rejected": -220.0203857421875, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023438826203346252, "rewards/margins": 0.10415074974298477, "rewards/rejected": -0.12758956849575043, "step": 4060 }, { "epoch": 0.27, "learning_rate": 4.590669732342685e-06, "logits/chosen": -2.1566336154937744, "logits/rejected": -2.012592315673828, "logps/chosen": -213.21224975585938, "logps/rejected": -226.32504272460938, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010646522045135498, "rewards/margins": 0.11220131814479828, "rewards/rejected": -0.12284784018993378, "step": 4070 }, { "epoch": 0.27, "learning_rate": 4.587533417873799e-06, "logits/chosen": -2.2341346740722656, "logits/rejected": -2.3223414421081543, "logps/chosen": -195.48965454101562, "logps/rejected": -263.8111267089844, "loss": 0.6928, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.023255977779626846, "rewards/margins": 0.08617839962244034, "rewards/rejected": -0.10943436622619629, "step": 4080 }, { "epoch": 0.27, "learning_rate": 4.584386213808016e-06, "logits/chosen": -2.2321903705596924, "logits/rejected": -1.8963381052017212, "logps/chosen": -222.75430297851562, "logps/rejected": -183.17471313476562, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01075290609151125, "rewards/margins": 0.061965636909008026, "rewards/rejected": -0.072718545794487, "step": 4090 }, { "epoch": 0.27, "learning_rate": 4.581228136562693e-06, "logits/chosen": -2.122157096862793, "logits/rejected": -2.2352585792541504, "logps/chosen": -239.14389038085938, "logps/rejected": -216.71829223632812, "loss": 0.693, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.009634166955947876, "rewards/margins": 0.03259027749300003, "rewards/rejected": -0.042224448174238205, "step": 4100 }, { "epoch": 0.27, "eval_logits/chosen": -2.3034896850585938, "eval_logits/rejected": -2.117652654647827, "eval_logps/chosen": -233.33334350585938, "eval_logps/rejected": -219.198486328125, "eval_loss": 0.6903753876686096, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": -0.013284044340252876, "eval_rewards/margins": 0.06258184462785721, "eval_rewards/rejected": -0.07586588710546494, "eval_runtime": 713.5236, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 4100 }, { "epoch": 0.27, "learning_rate": 4.578059202611909e-06, "logits/chosen": -2.3259823322296143, "logits/rejected": -2.100602149963379, "logps/chosen": -256.8636779785156, "logps/rejected": -246.4088897705078, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": 0.002952608745545149, "rewards/margins": 0.0430120974779129, "rewards/rejected": -0.04005948826670647, "step": 4110 }, { "epoch": 0.27, "learning_rate": 4.574879428486376e-06, "logits/chosen": -2.3123717308044434, "logits/rejected": -2.031857967376709, "logps/chosen": -214.11416625976562, "logps/rejected": -217.881591796875, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011976310983300209, "rewards/margins": 0.06689594686031342, "rewards/rejected": -0.07887225598096848, "step": 4120 }, { "epoch": 0.27, "learning_rate": 4.571688830773352e-06, "logits/chosen": -2.3346524238586426, "logits/rejected": -2.226778507232666, "logps/chosen": -223.7250213623047, "logps/rejected": -206.4811553955078, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": -0.016568060964345932, "rewards/margins": 0.02635127305984497, "rewards/rejected": -0.0429193340241909, "step": 4130 }, { "epoch": 0.27, "learning_rate": 4.568487426116559e-06, "logits/chosen": -2.2562003135681152, "logits/rejected": -2.2672030925750732, "logps/chosen": -172.1905059814453, "logps/rejected": -168.9148712158203, "loss": 0.6929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0021867998875677586, "rewards/margins": 0.04257757216691971, "rewards/rejected": -0.04476437345147133, "step": 4140 }, { "epoch": 0.27, "learning_rate": 4.565275231216092e-06, "logits/chosen": -2.171159267425537, "logits/rejected": -2.1469078063964844, "logps/chosen": -151.336669921875, "logps/rejected": -201.35855102539062, "loss": 0.6907, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 6.799399852752686e-05, "rewards/margins": 0.04031980410218239, "rewards/rejected": -0.04025180637836456, "step": 4150 }, { "epoch": 0.27, "learning_rate": 4.562052262828331e-06, "logits/chosen": -2.2262353897094727, "logits/rejected": -2.083603620529175, "logps/chosen": -201.1298065185547, "logps/rejected": -201.3617706298828, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -0.02318784035742283, "rewards/margins": 0.055079467594623566, "rewards/rejected": -0.07826730608940125, "step": 4160 }, { "epoch": 0.27, "learning_rate": 4.558818537765861e-06, "logits/chosen": -2.4017839431762695, "logits/rejected": -2.179560899734497, "logps/chosen": -237.0455322265625, "logps/rejected": -208.0402374267578, "loss": 0.6922, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010311352089047432, "rewards/margins": 0.05862750858068466, "rewards/rejected": -0.06893886625766754, "step": 4170 }, { "epoch": 0.27, "learning_rate": 4.555574072897374e-06, "logits/chosen": -2.3054423332214355, "logits/rejected": -2.3093464374542236, "logps/chosen": -202.890625, "logps/rejected": -206.8453826904297, "loss": 0.689, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.009293651208281517, "rewards/margins": 0.062263913452625275, "rewards/rejected": -0.07155755162239075, "step": 4180 }, { "epoch": 0.27, "learning_rate": 4.552318885147589e-06, "logits/chosen": -2.427234649658203, "logits/rejected": -2.067701816558838, "logps/chosen": -240.99063110351562, "logps/rejected": -188.87326049804688, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.000729889259673655, "rewards/margins": 0.07272221148014069, "rewards/rejected": -0.07345209270715714, "step": 4190 }, { "epoch": 0.27, "learning_rate": 4.549052991497159e-06, "logits/chosen": -2.283116102218628, "logits/rejected": -2.254042387008667, "logps/chosen": -181.29025268554688, "logps/rejected": -188.34085083007812, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011230905540287495, "rewards/margins": 0.06631726771593094, "rewards/rejected": -0.07754816114902496, "step": 4200 }, { "epoch": 0.27, "eval_logits/chosen": -2.296299934387207, "eval_logits/rejected": -2.1112263202667236, "eval_logps/chosen": -232.25408935546875, "eval_logps/rejected": -218.76133728027344, "eval_loss": 0.6903825402259827, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.002491473685950041, "eval_rewards/margins": 0.0690029114484787, "eval_rewards/rejected": -0.07149438560009003, "eval_runtime": 711.9807, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 4200 }, { "epoch": 0.28, "learning_rate": 4.545776408982585e-06, "logits/chosen": -2.222346782684326, "logits/rejected": -2.215831756591797, "logps/chosen": -230.8057403564453, "logps/rejected": -228.67105102539062, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.009383128955960274, "rewards/margins": 0.0687854140996933, "rewards/rejected": -0.059402287006378174, "step": 4210 }, { "epoch": 0.28, "learning_rate": 4.542489154696128e-06, "logits/chosen": -2.435891628265381, "logits/rejected": -2.0726494789123535, "logps/chosen": -265.7242736816406, "logps/rejected": -210.0156707763672, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.019960414618253708, "rewards/margins": 0.05542059987783432, "rewards/rejected": -0.035460181534290314, "step": 4220 }, { "epoch": 0.28, "learning_rate": 4.5391912457857145e-06, "logits/chosen": -2.3141016960144043, "logits/rejected": -2.055931568145752, "logps/chosen": -264.88525390625, "logps/rejected": -223.41726684570312, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0008437506621703506, "rewards/margins": 0.05641711503267288, "rewards/rejected": -0.05557336285710335, "step": 4230 }, { "epoch": 0.28, "learning_rate": 4.535882699454854e-06, "logits/chosen": -2.3128061294555664, "logits/rejected": -2.189279556274414, "logps/chosen": -270.4936218261719, "logps/rejected": -303.23992919921875, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": 0.005573967471718788, "rewards/margins": 0.11083599179983139, "rewards/rejected": -0.10526201874017715, "step": 4240 }, { "epoch": 0.28, "learning_rate": 4.532563532962546e-06, "logits/chosen": -2.368762493133545, "logits/rejected": -2.450859546661377, "logps/chosen": -191.6454315185547, "logps/rejected": -218.98867797851562, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.017511827871203423, "rewards/margins": 0.0574830062687397, "rewards/rejected": -0.07499483227729797, "step": 4250 }, { "epoch": 0.28, "learning_rate": 4.529233763623187e-06, "logits/chosen": -2.328399896621704, "logits/rejected": -2.034263849258423, "logps/chosen": -203.28858947753906, "logps/rejected": -163.58592224121094, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011880872771143913, "rewards/margins": 0.08256997168064117, "rewards/rejected": -0.09445084631443024, "step": 4260 }, { "epoch": 0.28, "learning_rate": 4.5258934088064854e-06, "logits/chosen": -2.2317817211151123, "logits/rejected": -1.83087158203125, "logps/chosen": -223.72305297851562, "logps/rejected": -181.90640258789062, "loss": 0.6868, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.037422824651002884, "rewards/margins": 0.12880873680114746, "rewards/rejected": -0.16623155772686005, "step": 4270 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -2.3460514545440674, "logits/rejected": -2.08577299118042, "logps/chosen": -293.10992431640625, "logps/rejected": -206.9127655029297, "loss": 0.6889, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.024574220180511475, "rewards/margins": 0.1044369488954544, "rewards/rejected": -0.12901116907596588, "step": 4280 }, { "epoch": 0.28, "learning_rate": 4.519181012495892e-06, "logits/chosen": -2.3494815826416016, "logits/rejected": -2.219589948654175, "logps/chosen": -247.26657104492188, "logps/rejected": -227.00888061523438, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.05047302693128586, "rewards/margins": 0.08816438913345337, "rewards/rejected": -0.13863742351531982, "step": 4290 }, { "epoch": 0.28, "learning_rate": 4.515809006017147e-06, "logits/chosen": -2.274042844772339, "logits/rejected": -1.9699468612670898, "logps/chosen": -236.0398712158203, "logps/rejected": -208.1054229736328, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.026302605867385864, "rewards/margins": 0.07574830204248428, "rewards/rejected": -0.10205090045928955, "step": 4300 }, { "epoch": 0.28, "eval_logits/chosen": -2.286620855331421, "eval_logits/rejected": -2.1015021800994873, "eval_logps/chosen": -235.3810272216797, "eval_logps/rejected": -223.5635223388672, "eval_loss": 0.6901422142982483, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.03376083821058273, "eval_rewards/margins": 0.08575531840324402, "eval_rewards/rejected": -0.11951615661382675, "eval_runtime": 711.2011, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 4300 }, { "epoch": 0.28, "learning_rate": 4.512426484091171e-06, "logits/chosen": -2.418959379196167, "logits/rejected": -2.085226058959961, "logps/chosen": -279.13177490234375, "logps/rejected": -249.38064575195312, "loss": 0.6927, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.015505967661738396, "rewards/margins": 0.06541910022497177, "rewards/rejected": -0.08092506229877472, "step": 4310 }, { "epoch": 0.28, "learning_rate": 4.509033464362858e-06, "logits/chosen": -2.122525215148926, "logits/rejected": -2.1860134601593018, "logps/chosen": -243.3289794921875, "logps/rejected": -266.288330078125, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.020630965009331703, "rewards/margins": 0.08728428930044174, "rewards/rejected": -0.10791525989770889, "step": 4320 }, { "epoch": 0.28, "learning_rate": 4.505629964531857e-06, "logits/chosen": -2.3952324390411377, "logits/rejected": -2.192960500717163, "logps/chosen": -226.1346435546875, "logps/rejected": -203.77609252929688, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": -0.029302721843123436, "rewards/margins": 0.08454638719558716, "rewards/rejected": -0.11384911835193634, "step": 4330 }, { "epoch": 0.28, "learning_rate": 4.502216002352492e-06, "logits/chosen": -2.3942387104034424, "logits/rejected": -2.1669986248016357, "logps/chosen": -167.1390380859375, "logps/rejected": -154.9490203857422, "loss": 0.6914, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04098113626241684, "rewards/margins": 0.07224190980195999, "rewards/rejected": -0.11322303861379623, "step": 4340 }, { "epoch": 0.28, "learning_rate": 4.498791595633663e-06, "logits/chosen": -2.227745771408081, "logits/rejected": -1.8462340831756592, "logps/chosen": -265.2237854003906, "logps/rejected": -183.4816131591797, "loss": 0.6912, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.02839501202106476, "rewards/margins": 0.054582733660936356, "rewards/rejected": -0.08297775685787201, "step": 4350 }, { "epoch": 0.29, "learning_rate": 4.495356762238751e-06, "logits/chosen": -2.472080707550049, "logits/rejected": -1.9949004650115967, "logps/chosen": -284.7836608886719, "logps/rejected": -195.12869262695312, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.015857676044106483, "rewards/margins": 0.07797005027532578, "rewards/rejected": -0.09382772445678711, "step": 4360 }, { "epoch": 0.29, "learning_rate": 4.491911520085532e-06, "logits/chosen": -2.046393394470215, "logits/rejected": -1.9404007196426392, "logps/chosen": -202.2923126220703, "logps/rejected": -221.1287078857422, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.018845614045858383, "rewards/margins": 0.08063776046037674, "rewards/rejected": -0.09948337823152542, "step": 4370 }, { "epoch": 0.29, "learning_rate": 4.488455887146075e-06, "logits/chosen": -2.159259080886841, "logits/rejected": -2.141447067260742, "logps/chosen": -172.12095642089844, "logps/rejected": -198.18894958496094, "loss": 0.6879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.020088694989681244, "rewards/margins": 0.12904280424118042, "rewards/rejected": -0.14913150668144226, "step": 4380 }, { "epoch": 0.29, "learning_rate": 4.484989881446654e-06, "logits/chosen": -2.4215025901794434, "logits/rejected": -2.220041275024414, "logps/chosen": -204.8853302001953, "logps/rejected": -191.7876434326172, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -0.024093201383948326, "rewards/margins": 0.0484703965485096, "rewards/rejected": -0.07256358861923218, "step": 4390 }, { "epoch": 0.29, "learning_rate": 4.481513521067654e-06, "logits/chosen": -2.3942711353302, "logits/rejected": -2.039447546005249, "logps/chosen": -228.5469512939453, "logps/rejected": -200.28292846679688, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03772415220737457, "rewards/margins": 0.07151724398136139, "rewards/rejected": -0.10924138873815536, "step": 4400 }, { "epoch": 0.29, "eval_logits/chosen": -2.292935609817505, "eval_logits/rejected": -2.107675790786743, "eval_logps/chosen": -236.54518127441406, "eval_logps/rejected": -223.5493927001953, "eval_loss": 0.6902163028717041, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.04540235176682472, "eval_rewards/margins": 0.07397259771823883, "eval_rewards/rejected": -0.11937494575977325, "eval_runtime": 713.6438, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 4400 }, { "epoch": 0.29, "learning_rate": 4.478026824143473e-06, "logits/chosen": -2.3092598915100098, "logits/rejected": -2.173832416534424, "logps/chosen": -270.6745910644531, "logps/rejected": -224.7923583984375, "loss": 0.6862, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.046129751950502396, "rewards/margins": 0.10685235261917114, "rewards/rejected": -0.15298210084438324, "step": 4410 }, { "epoch": 0.29, "learning_rate": 4.474529808862429e-06, "logits/chosen": -2.197213649749756, "logits/rejected": -2.124817371368408, "logps/chosen": -193.43679809570312, "logps/rejected": -218.94619750976562, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04616478085517883, "rewards/margins": 0.07885146141052246, "rewards/rejected": -0.1250162422657013, "step": 4420 }, { "epoch": 0.29, "learning_rate": 4.471022493466669e-06, "logits/chosen": -2.3107991218566895, "logits/rejected": -1.9796260595321655, "logps/chosen": -303.61737060546875, "logps/rejected": -232.7263641357422, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.016782710328698158, "rewards/margins": 0.06120295077562332, "rewards/rejected": -0.07798566669225693, "step": 4430 }, { "epoch": 0.29, "learning_rate": 4.467504896252066e-06, "logits/chosen": -2.335106372833252, "logits/rejected": -2.22440767288208, "logps/chosen": -252.58322143554688, "logps/rejected": -233.07406616210938, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.028819028288125992, "rewards/margins": 0.09233070909976959, "rewards/rejected": -0.12114973366260529, "step": 4440 }, { "epoch": 0.29, "learning_rate": 4.463977035568132e-06, "logits/chosen": -2.1951041221618652, "logits/rejected": -2.431762933731079, "logps/chosen": -214.28427124023438, "logps/rejected": -269.91864013671875, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.019015159457921982, "rewards/margins": 0.039287667721509933, "rewards/rejected": -0.058302827179431915, "step": 4450 }, { "epoch": 0.29, "learning_rate": 4.460438929817914e-06, "logits/chosen": -2.264540672302246, "logits/rejected": -2.11928129196167, "logps/chosen": -207.63388061523438, "logps/rejected": -209.83316040039062, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012141216546297073, "rewards/margins": 0.06487870216369629, "rewards/rejected": -0.07701991498470306, "step": 4460 }, { "epoch": 0.29, "learning_rate": 4.456890597457907e-06, "logits/chosen": -2.112905979156494, "logits/rejected": -2.159135341644287, "logps/chosen": -216.1064453125, "logps/rejected": -243.3882293701172, "loss": 0.6893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03970780223608017, "rewards/margins": 0.09077353030443192, "rewards/rejected": -0.13048133254051208, "step": 4470 }, { "epoch": 0.29, "learning_rate": 4.453332056997951e-06, "logits/chosen": -2.2369141578674316, "logits/rejected": -2.2910995483398438, "logps/chosen": -181.7244110107422, "logps/rejected": -187.7960662841797, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01764925941824913, "rewards/margins": 0.10396716743707657, "rewards/rejected": -0.1216164231300354, "step": 4480 }, { "epoch": 0.29, "learning_rate": 4.449763327001134e-06, "logits/chosen": -2.2684309482574463, "logits/rejected": -2.174893617630005, "logps/chosen": -190.84078979492188, "logps/rejected": -226.34326171875, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.020785531029105186, "rewards/margins": 0.07087056338787079, "rewards/rejected": -0.09165609627962112, "step": 4490 }, { "epoch": 0.29, "learning_rate": 4.446184426083702e-06, "logits/chosen": -2.249093532562256, "logits/rejected": -2.0277256965637207, "logps/chosen": -195.96009826660156, "logps/rejected": -216.97604370117188, "loss": 0.6864, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.023957300931215286, "rewards/margins": 0.12648364901542664, "rewards/rejected": -0.15044096112251282, "step": 4500 }, { "epoch": 0.29, "eval_logits/chosen": -2.307412624359131, "eval_logits/rejected": -2.1211140155792236, "eval_logps/chosen": -234.31179809570312, "eval_logps/rejected": -222.2449493408203, "eval_loss": 0.6901082396507263, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": -0.02306850627064705, "eval_rewards/margins": 0.0832618772983551, "eval_rewards/rejected": -0.10633040219545364, "eval_runtime": 714.5639, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 4500 }, { "epoch": 0.3, "learning_rate": 4.442595372914954e-06, "logits/chosen": -2.3577396869659424, "logits/rejected": -2.0909037590026855, "logps/chosen": -236.86007690429688, "logps/rejected": -160.77267456054688, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0012024863390251994, "rewards/margins": 0.09186828881502151, "rewards/rejected": -0.090665802359581, "step": 4510 }, { "epoch": 0.3, "learning_rate": 4.43899618621715e-06, "logits/chosen": -2.303638458251953, "logits/rejected": -2.097632884979248, "logps/chosen": -254.14059448242188, "logps/rejected": -269.0081481933594, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.030867312103509903, "rewards/margins": 0.1167084202170372, "rewards/rejected": -0.1475757360458374, "step": 4520 }, { "epoch": 0.3, "learning_rate": 4.4353868847654105e-06, "logits/chosen": -2.4185733795166016, "logits/rejected": -2.1811881065368652, "logps/chosen": -244.586181640625, "logps/rejected": -222.5398406982422, "loss": 0.6862, "rewards/accuracies": 0.625, "rewards/chosen": 0.012720689177513123, "rewards/margins": 0.08021236956119537, "rewards/rejected": -0.06749166548252106, "step": 4530 }, { "epoch": 0.3, "learning_rate": 4.43176748738762e-06, "logits/chosen": -2.3359453678131104, "logits/rejected": -2.09609055519104, "logps/chosen": -233.65359497070312, "logps/rejected": -248.21719360351562, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01705637015402317, "rewards/margins": 0.10175220668315887, "rewards/rejected": -0.1188085675239563, "step": 4540 }, { "epoch": 0.3, "learning_rate": 4.4281380129643295e-06, "logits/chosen": -2.2307958602905273, "logits/rejected": -2.0683705806732178, "logps/chosen": -229.425537109375, "logps/rejected": -228.88040161132812, "loss": 0.6899, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0032461367081850767, "rewards/margins": 0.0992891788482666, "rewards/rejected": -0.09604303538799286, "step": 4550 }, { "epoch": 0.3, "learning_rate": 4.424498480428654e-06, "logits/chosen": -2.258957862854004, "logits/rejected": -2.15374755859375, "logps/chosen": -249.0747528076172, "logps/rejected": -211.0282745361328, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.018028225749731064, "rewards/margins": 0.01978963240981102, "rewards/rejected": -0.037817858159542084, "step": 4560 }, { "epoch": 0.3, "learning_rate": 4.420848908766178e-06, "logits/chosen": -2.3698325157165527, "logits/rejected": -2.2980637550354004, "logps/chosen": -206.7262420654297, "logps/rejected": -220.7576141357422, "loss": 0.69, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.005006049759685993, "rewards/margins": 0.05151135474443436, "rewards/rejected": -0.04650530219078064, "step": 4570 }, { "epoch": 0.3, "learning_rate": 4.417189317014855e-06, "logits/chosen": -2.2065937519073486, "logits/rejected": -2.445247173309326, "logps/chosen": -199.8828582763672, "logps/rejected": -236.3675994873047, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.009130073711276054, "rewards/margins": 0.057878412306308746, "rewards/rejected": -0.04874832555651665, "step": 4580 }, { "epoch": 0.3, "learning_rate": 4.41351972426491e-06, "logits/chosen": -2.129570484161377, "logits/rejected": -2.161388874053955, "logps/chosen": -248.59689331054688, "logps/rejected": -309.24725341796875, "loss": 0.6913, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.018575403839349747, "rewards/margins": 0.06597335636615753, "rewards/rejected": -0.08454876393079758, "step": 4590 }, { "epoch": 0.3, "learning_rate": 4.409840149658735e-06, "logits/chosen": -2.2294223308563232, "logits/rejected": -1.970663070678711, "logps/chosen": -284.4920654296875, "logps/rejected": -242.57687377929688, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.0002518877445254475, "rewards/margins": 0.08236613124608994, "rewards/rejected": -0.08211424201726913, "step": 4600 }, { "epoch": 0.3, "eval_logits/chosen": -2.3078274726867676, "eval_logits/rejected": -2.121540069580078, "eval_logps/chosen": -231.3809051513672, "eval_logps/rejected": -218.01165771484375, "eval_loss": 0.6902089715003967, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": 0.0062404475174844265, "eval_rewards/margins": 0.07023809105157852, "eval_rewards/rejected": -0.06399764865636826, "eval_runtime": 711.6306, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 4600 }, { "epoch": 0.3, "learning_rate": 4.4061506123907925e-06, "logits/chosen": -2.226529598236084, "logits/rejected": -2.063323497772217, "logps/chosen": -263.83251953125, "logps/rejected": -228.8885040283203, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0025929573457688093, "rewards/margins": 0.05362165719270706, "rewards/rejected": -0.051028698682785034, "step": 4610 }, { "epoch": 0.3, "learning_rate": 4.402451131707519e-06, "logits/chosen": -2.4300453662872314, "logits/rejected": -1.9670915603637695, "logps/chosen": -208.1881866455078, "logps/rejected": -143.37477111816406, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.012772110290825367, "rewards/margins": 0.1066797599196434, "rewards/rejected": -0.09390763938426971, "step": 4620 }, { "epoch": 0.3, "learning_rate": 4.398741726907215e-06, "logits/chosen": -2.4595742225646973, "logits/rejected": -2.141775608062744, "logps/chosen": -277.5190734863281, "logps/rejected": -244.45352172851562, "loss": 0.6886, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.017383281141519547, "rewards/margins": 0.07261139899492264, "rewards/rejected": -0.05522811412811279, "step": 4630 }, { "epoch": 0.3, "learning_rate": 4.395022417339955e-06, "logits/chosen": -2.2033920288085938, "logits/rejected": -2.2470412254333496, "logps/chosen": -208.9228973388672, "logps/rejected": -223.9864044189453, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.022978752851486206, "rewards/margins": 0.0657435953617096, "rewards/rejected": -0.0887223556637764, "step": 4640 }, { "epoch": 0.3, "learning_rate": 4.391293222407479e-06, "logits/chosen": -2.30222487449646, "logits/rejected": -2.303806781768799, "logps/chosen": -136.2609405517578, "logps/rejected": -160.14111328125, "loss": 0.6907, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0029621024150401354, "rewards/margins": 0.05650148540735245, "rewards/rejected": -0.05353938415646553, "step": 4650 }, { "epoch": 0.3, "learning_rate": 4.387554161563094e-06, "logits/chosen": -2.3135313987731934, "logits/rejected": -2.2245595455169678, "logps/chosen": -200.64547729492188, "logps/rejected": -199.0697479248047, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.017812874168157578, "rewards/margins": 0.10243946313858032, "rewards/rejected": -0.1202523335814476, "step": 4660 }, { "epoch": 0.31, "learning_rate": 4.383805254311575e-06, "logits/chosen": -2.509479522705078, "logits/rejected": -2.135781764984131, "logps/chosen": -257.2196044921875, "logps/rejected": -218.57418823242188, "loss": 0.6896, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.010785650461912155, "rewards/margins": 0.06920838356018066, "rewards/rejected": -0.07999403774738312, "step": 4670 }, { "epoch": 0.31, "learning_rate": 4.380046520209056e-06, "logits/chosen": -2.3661510944366455, "logits/rejected": -1.9948110580444336, "logps/chosen": -202.7303924560547, "logps/rejected": -186.60891723632812, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": -0.024640483781695366, "rewards/margins": 0.07717674970626831, "rewards/rejected": -0.10181725025177002, "step": 4680 }, { "epoch": 0.31, "learning_rate": 4.376277978862936e-06, "logits/chosen": -2.2196907997131348, "logits/rejected": -1.9270433187484741, "logps/chosen": -227.43930053710938, "logps/rejected": -193.55284118652344, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.014506752602756023, "rewards/margins": 0.06645031273365021, "rewards/rejected": -0.0809570699930191, "step": 4690 }, { "epoch": 0.31, "learning_rate": 4.372499649931774e-06, "logits/chosen": -2.1691818237304688, "logits/rejected": -2.317289113998413, "logps/chosen": -212.4759063720703, "logps/rejected": -234.37451171875, "loss": 0.6854, "rewards/accuracies": 0.75, "rewards/chosen": -0.0338435061275959, "rewards/margins": 0.13942193984985352, "rewards/rejected": -0.1732654571533203, "step": 4700 }, { "epoch": 0.31, "eval_logits/chosen": -2.31925368309021, "eval_logits/rejected": -2.1311213970184326, "eval_logps/chosen": -235.55807495117188, "eval_logps/rejected": -224.37208557128906, "eval_loss": 0.6902915835380554, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.03553127497434616, "eval_rewards/margins": 0.09207045286893845, "eval_rewards/rejected": -0.1276017278432846, "eval_runtime": 714.2184, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 4700 }, { "epoch": 0.31, "learning_rate": 4.368711553125185e-06, "logits/chosen": -2.5005226135253906, "logits/rejected": -2.265688180923462, "logps/chosen": -281.1730041503906, "logps/rejected": -229.6286163330078, "loss": 0.6925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0398295521736145, "rewards/margins": 0.06899070739746094, "rewards/rejected": -0.10882025957107544, "step": 4710 }, { "epoch": 0.31, "learning_rate": 4.364913708203734e-06, "logits/chosen": -2.398655891418457, "logits/rejected": -2.0503017902374268, "logps/chosen": -289.25067138671875, "logps/rejected": -222.9422149658203, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04354417696595192, "rewards/margins": 0.09108763188123703, "rewards/rejected": -0.13463182747364044, "step": 4720 }, { "epoch": 0.31, "learning_rate": 4.361106134978844e-06, "logits/chosen": -2.277704954147339, "logits/rejected": -2.071712017059326, "logps/chosen": -272.6820068359375, "logps/rejected": -265.93670654296875, "loss": 0.6922, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.022515593096613884, "rewards/margins": 0.06205441802740097, "rewards/rejected": -0.08457001298666, "step": 4730 }, { "epoch": 0.31, "learning_rate": 4.357288853312681e-06, "logits/chosen": -2.3490684032440186, "logits/rejected": -2.26945161819458, "logps/chosen": -287.81549072265625, "logps/rejected": -287.1567687988281, "loss": 0.692, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04337679222226143, "rewards/margins": 0.04078169912099838, "rewards/rejected": -0.08415848016738892, "step": 4740 }, { "epoch": 0.31, "learning_rate": 4.353461883118056e-06, "logits/chosen": -2.249939203262329, "logits/rejected": -2.112075090408325, "logps/chosen": -232.1291961669922, "logps/rejected": -214.3981475830078, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.02838616445660591, "rewards/margins": 0.03420080989599228, "rewards/rejected": -0.06258697807788849, "step": 4750 }, { "epoch": 0.31, "learning_rate": 4.34962524435832e-06, "logits/chosen": -2.1387410163879395, "logits/rejected": -2.0375964641571045, "logps/chosen": -221.3422393798828, "logps/rejected": -193.26048278808594, "loss": 0.6922, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.016618115827441216, "rewards/margins": 0.08751632273197174, "rewards/rejected": -0.10413442552089691, "step": 4760 }, { "epoch": 0.31, "learning_rate": 4.34577895704726e-06, "logits/chosen": -2.372318744659424, "logits/rejected": -2.1986355781555176, "logps/chosen": -263.10650634765625, "logps/rejected": -244.01565551757812, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01492463331669569, "rewards/margins": 0.06633206456899643, "rewards/rejected": -0.08125670254230499, "step": 4770 }, { "epoch": 0.31, "learning_rate": 4.3419230412489954e-06, "logits/chosen": -2.470191478729248, "logits/rejected": -2.233651638031006, "logps/chosen": -291.9186096191406, "logps/rejected": -221.01748657226562, "loss": 0.6918, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0255076102912426, "rewards/margins": 0.051627278327941895, "rewards/rejected": -0.07713489234447479, "step": 4780 }, { "epoch": 0.31, "learning_rate": 4.338057517077872e-06, "logits/chosen": -2.417341709136963, "logits/rejected": -2.014641284942627, "logps/chosen": -193.8696746826172, "logps/rejected": -168.1341094970703, "loss": 0.6813, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0017154158558696508, "rewards/margins": 0.17616704106330872, "rewards/rejected": -0.17445163428783417, "step": 4790 }, { "epoch": 0.31, "learning_rate": 4.334182404698356e-06, "logits/chosen": -2.4133429527282715, "logits/rejected": -1.977574110031128, "logps/chosen": -234.8191680908203, "logps/rejected": -160.61514282226562, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04712152108550072, "rewards/margins": 0.04195799678564072, "rewards/rejected": -0.08907952159643173, "step": 4800 }, { "epoch": 0.31, "eval_logits/chosen": -2.3064463138580322, "eval_logits/rejected": -2.1199657917022705, "eval_logps/chosen": -233.79531860351562, "eval_logps/rejected": -220.7675323486328, "eval_loss": 0.6901711225509644, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.017903409898281097, "eval_rewards/margins": 0.07365269213914871, "eval_rewards/rejected": -0.09155610203742981, "eval_runtime": 711.7896, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 4800 }, { "epoch": 0.31, "learning_rate": 4.330297724324933e-06, "logits/chosen": -2.5674805641174316, "logits/rejected": -2.029761791229248, "logps/chosen": -308.3761291503906, "logps/rejected": -214.9816436767578, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0003527544322423637, "rewards/margins": 0.07805721461772919, "rewards/rejected": -0.07770445942878723, "step": 4810 }, { "epoch": 0.32, "learning_rate": 4.326403496221999e-06, "logits/chosen": -2.235084056854248, "logits/rejected": -2.138692617416382, "logps/chosen": -163.4996337890625, "logps/rejected": -143.01316833496094, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.020706117153167725, "rewards/margins": 0.06293468922376633, "rewards/rejected": -0.08364080637693405, "step": 4820 }, { "epoch": 0.32, "learning_rate": 4.322499740703755e-06, "logits/chosen": -2.19960618019104, "logits/rejected": -2.30405592918396, "logps/chosen": -193.19754028320312, "logps/rejected": -224.2078094482422, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.010983394458889961, "rewards/margins": 0.06486980617046356, "rewards/rejected": -0.07585321366786957, "step": 4830 }, { "epoch": 0.32, "learning_rate": 4.318586478134101e-06, "logits/chosen": -2.2257297039031982, "logits/rejected": -2.188767671585083, "logps/chosen": -192.26211547851562, "logps/rejected": -158.5824737548828, "loss": 0.6894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004053809680044651, "rewards/margins": 0.06840833276510239, "rewards/rejected": -0.06435452401638031, "step": 4840 }, { "epoch": 0.32, "learning_rate": 4.314663728926534e-06, "logits/chosen": -2.4708011150360107, "logits/rejected": -2.215599536895752, "logps/chosen": -259.12109375, "logps/rejected": -254.83389282226562, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.022453729063272476, "rewards/margins": 0.0629110336303711, "rewards/rejected": -0.08536475896835327, "step": 4850 }, { "epoch": 0.32, "learning_rate": 4.310731513544033e-06, "logits/chosen": -2.26763653755188, "logits/rejected": -2.0921549797058105, "logps/chosen": -245.0206756591797, "logps/rejected": -206.82406616210938, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.021629411727190018, "rewards/margins": 0.07882945239543915, "rewards/rejected": -0.10045886039733887, "step": 4860 }, { "epoch": 0.32, "learning_rate": 4.30678985249896e-06, "logits/chosen": -2.255072832107544, "logits/rejected": -2.1915435791015625, "logps/chosen": -159.80569458007812, "logps/rejected": -188.56588745117188, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.026556584984064102, "rewards/margins": 0.10496687889099121, "rewards/rejected": -0.13152346014976501, "step": 4870 }, { "epoch": 0.32, "learning_rate": 4.302838766352952e-06, "logits/chosen": -2.2494466304779053, "logits/rejected": -2.029052495956421, "logps/chosen": -259.2070007324219, "logps/rejected": -231.5312042236328, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03614037483930588, "rewards/margins": 0.08115691691637039, "rewards/rejected": -0.11729729175567627, "step": 4880 }, { "epoch": 0.32, "learning_rate": 4.298878275716806e-06, "logits/chosen": -2.175429582595825, "logits/rejected": -2.159177780151367, "logps/chosen": -196.0753936767578, "logps/rejected": -203.73773193359375, "loss": 0.6872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04195953160524368, "rewards/margins": 0.10459339618682861, "rewards/rejected": -0.1465529352426529, "step": 4890 }, { "epoch": 0.32, "learning_rate": 4.294908401250386e-06, "logits/chosen": -2.3999128341674805, "logits/rejected": -1.9628560543060303, "logps/chosen": -217.9789276123047, "logps/rejected": -182.8585968017578, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.032358746975660324, "rewards/margins": 0.0941200703382492, "rewards/rejected": -0.12647880613803864, "step": 4900 }, { "epoch": 0.32, "eval_logits/chosen": -2.284270763397217, "eval_logits/rejected": -2.0991475582122803, "eval_logps/chosen": -234.08592224121094, "eval_logps/rejected": -222.58131408691406, "eval_loss": 0.6902449131011963, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.020809680223464966, "eval_rewards/margins": 0.08888448029756546, "eval_rewards/rejected": -0.10969416052103043, "eval_runtime": 710.5969, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 4900 }, { "epoch": 0.32, "learning_rate": 4.290929163662498e-06, "logits/chosen": -2.1393580436706543, "logits/rejected": -1.9254634380340576, "logps/chosen": -271.26580810546875, "logps/rejected": -221.15762329101562, "loss": 0.6891, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.015305581502616405, "rewards/margins": 0.076082743704319, "rewards/rejected": -0.09138831496238708, "step": 4910 }, { "epoch": 0.32, "learning_rate": 4.286940583710796e-06, "logits/chosen": -2.330704689025879, "logits/rejected": -2.207934856414795, "logps/chosen": -294.7912292480469, "logps/rejected": -248.0767364501953, "loss": 0.6919, "rewards/accuracies": 0.75, "rewards/chosen": -0.020425736904144287, "rewards/margins": 0.09915411472320557, "rewards/rejected": -0.11957985162734985, "step": 4920 }, { "epoch": 0.32, "learning_rate": 4.282942682201667e-06, "logits/chosen": -2.2037787437438965, "logits/rejected": -1.929842233657837, "logps/chosen": -255.17098999023438, "logps/rejected": -224.1762237548828, "loss": 0.6921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0306318998336792, "rewards/margins": 0.08591778576374054, "rewards/rejected": -0.11654969304800034, "step": 4930 }, { "epoch": 0.32, "learning_rate": 4.278935479990123e-06, "logits/chosen": -2.479978322982788, "logits/rejected": -2.227200508117676, "logps/chosen": -207.30966186523438, "logps/rejected": -171.01812744140625, "loss": 0.6892, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.02646293118596077, "rewards/margins": 0.06679949164390564, "rewards/rejected": -0.09326241165399551, "step": 4940 }, { "epoch": 0.32, "learning_rate": 4.274918997979695e-06, "logits/chosen": -2.2375760078430176, "logits/rejected": -2.2690727710723877, "logps/chosen": -197.79953002929688, "logps/rejected": -205.44857788085938, "loss": 0.6919, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0416448749601841, "rewards/margins": 0.05913674831390381, "rewards/rejected": -0.10078161954879761, "step": 4950 }, { "epoch": 0.32, "learning_rate": 4.270893257122319e-06, "logits/chosen": -2.1356120109558105, "logits/rejected": -1.9105371236801147, "logps/chosen": -230.14260864257812, "logps/rejected": -272.1700744628906, "loss": 0.6865, "rewards/accuracies": 0.625, "rewards/chosen": -0.014206337742507458, "rewards/margins": 0.12518110871315002, "rewards/rejected": -0.13938744366168976, "step": 4960 }, { "epoch": 0.33, "learning_rate": 4.266858278418232e-06, "logits/chosen": -2.1323647499084473, "logits/rejected": -1.8933664560317993, "logps/chosen": -238.7414093017578, "logps/rejected": -223.8094940185547, "loss": 0.6888, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.021372055634856224, "rewards/margins": 0.05419282987713814, "rewards/rejected": -0.07556488364934921, "step": 4970 }, { "epoch": 0.33, "learning_rate": 4.26281408291586e-06, "logits/chosen": -2.3737692832946777, "logits/rejected": -2.0658111572265625, "logps/chosen": -242.1832275390625, "logps/rejected": -223.305419921875, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": 0.0015409494517371058, "rewards/margins": 0.10654549300670624, "rewards/rejected": -0.10500454902648926, "step": 4980 }, { "epoch": 0.33, "learning_rate": 4.258760691711706e-06, "logits/chosen": -2.291581392288208, "logits/rejected": -2.1656148433685303, "logps/chosen": -198.389404296875, "logps/rejected": -202.20599365234375, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009278281591832638, "rewards/margins": 0.08409412950277328, "rewards/rejected": -0.0933724194765091, "step": 4990 }, { "epoch": 0.33, "learning_rate": 4.254698125950247e-06, "logits/chosen": -2.530463695526123, "logits/rejected": -2.2810826301574707, "logps/chosen": -300.74664306640625, "logps/rejected": -259.6216735839844, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0026790399570018053, "rewards/margins": 0.05540703609585762, "rewards/rejected": -0.05272800475358963, "step": 5000 }, { "epoch": 0.33, "eval_logits/chosen": -2.2863876819610596, "eval_logits/rejected": -2.1009578704833984, "eval_logps/chosen": -232.66943359375, "eval_logps/rejected": -220.42218017578125, "eval_loss": 0.6900946497917175, "eval_rewards/accuracies": 0.6269999742507935, "eval_rewards/chosen": -0.006644845940172672, "eval_rewards/margins": 0.08145791292190552, "eval_rewards/rejected": -0.08810276538133621, "eval_runtime": 708.8089, "eval_samples_per_second": 2.822, "eval_steps_per_second": 1.411, "step": 5000 }, { "epoch": 0.33, "learning_rate": 4.250626406823815e-06, "logits/chosen": -2.3481221199035645, "logits/rejected": -2.086394786834717, "logps/chosen": -217.6735076904297, "logps/rejected": -247.91268920898438, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.008107764646410942, "rewards/margins": 0.14061132073402405, "rewards/rejected": -0.14871908724308014, "step": 5010 }, { "epoch": 0.33, "learning_rate": 4.246545555572489e-06, "logits/chosen": -2.260010242462158, "logits/rejected": -2.139444351196289, "logps/chosen": -153.61056518554688, "logps/rejected": -189.370849609375, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.005855654366314411, "rewards/margins": 0.10808217525482178, "rewards/rejected": -0.10222651809453964, "step": 5020 }, { "epoch": 0.33, "learning_rate": 4.242455593483992e-06, "logits/chosen": -2.340317964553833, "logits/rejected": -2.138221263885498, "logps/chosen": -218.4593048095703, "logps/rejected": -174.04708862304688, "loss": 0.692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.006946141831576824, "rewards/margins": 0.052972644567489624, "rewards/rejected": -0.04602649435400963, "step": 5030 }, { "epoch": 0.33, "learning_rate": 4.238356541893567e-06, "logits/chosen": -2.2381134033203125, "logits/rejected": -2.103583812713623, "logps/chosen": -190.62313842773438, "logps/rejected": -182.2062225341797, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0049001253210008144, "rewards/margins": 0.07125677168369293, "rewards/rejected": -0.07615689188241959, "step": 5040 }, { "epoch": 0.33, "learning_rate": 4.234248422183876e-06, "logits/chosen": -2.0986828804016113, "logits/rejected": -2.2984180450439453, "logps/chosen": -241.3805694580078, "logps/rejected": -242.21878051757812, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.01554117538034916, "rewards/margins": 0.0539456307888031, "rewards/rejected": -0.03840445727109909, "step": 5050 }, { "epoch": 0.33, "learning_rate": 4.230131255784884e-06, "logits/chosen": -2.5071334838867188, "logits/rejected": -2.1970627307891846, "logps/chosen": -253.6757354736328, "logps/rejected": -244.3105926513672, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": 0.04207398369908333, "rewards/margins": 0.09431316703557968, "rewards/rejected": -0.05223918706178665, "step": 5060 }, { "epoch": 0.33, "learning_rate": 4.226005064173748e-06, "logits/chosen": -2.299450159072876, "logits/rejected": -2.1493353843688965, "logps/chosen": -261.6854553222656, "logps/rejected": -286.0189208984375, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": 0.029588108882308006, "rewards/margins": 0.058764077723026276, "rewards/rejected": -0.02917597070336342, "step": 5070 }, { "epoch": 0.33, "learning_rate": 4.2218698688747035e-06, "logits/chosen": -2.133448362350464, "logits/rejected": -1.9765691757202148, "logps/chosen": -233.8523712158203, "logps/rejected": -196.45217895507812, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.01143039483577013, "rewards/margins": 0.07730044424533844, "rewards/rejected": -0.0887308269739151, "step": 5080 }, { "epoch": 0.33, "learning_rate": 4.217725691458957e-06, "logits/chosen": -2.4555492401123047, "logits/rejected": -2.2806808948516846, "logps/chosen": -183.96484375, "logps/rejected": -221.15011596679688, "loss": 0.6875, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0025927810929715633, "rewards/margins": 0.10239323228597641, "rewards/rejected": -0.09980045258998871, "step": 5090 }, { "epoch": 0.33, "learning_rate": 4.213572553544565e-06, "logits/chosen": -2.343311071395874, "logits/rejected": -2.1145055294036865, "logps/chosen": -239.4435577392578, "logps/rejected": -242.26553344726562, "loss": 0.6914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.015282916836440563, "rewards/margins": 0.10186527669429779, "rewards/rejected": -0.08658237755298615, "step": 5100 }, { "epoch": 0.33, "eval_logits/chosen": -2.304870843887329, "eval_logits/rejected": -2.1186633110046387, "eval_logps/chosen": -232.49884033203125, "eval_logps/rejected": -220.59129333496094, "eval_loss": 0.6902390122413635, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.004938941914588213, "eval_rewards/margins": 0.08485515415668488, "eval_rewards/rejected": -0.0897940918803215, "eval_runtime": 709.3713, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.41, "step": 5100 }, { "epoch": 0.33, "learning_rate": 4.209410476796331e-06, "logits/chosen": -2.2306551933288574, "logits/rejected": -2.1737866401672363, "logps/chosen": -172.79660034179688, "logps/rejected": -172.89422607421875, "loss": 0.6879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.017051298171281815, "rewards/margins": 0.10086864233016968, "rewards/rejected": -0.11791994422674179, "step": 5110 }, { "epoch": 0.33, "learning_rate": 4.205239482925686e-06, "logits/chosen": -2.1063780784606934, "logits/rejected": -2.153102397918701, "logps/chosen": -188.4702911376953, "logps/rejected": -215.7820587158203, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.006549468729645014, "rewards/margins": 0.05185345560312271, "rewards/rejected": -0.05840292572975159, "step": 5120 }, { "epoch": 0.34, "learning_rate": 4.201059593690577e-06, "logits/chosen": -2.3719985485076904, "logits/rejected": -2.2848258018493652, "logps/chosen": -225.9248046875, "logps/rejected": -207.27273559570312, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0033702123910188675, "rewards/margins": 0.06776970624923706, "rewards/rejected": -0.07113991677761078, "step": 5130 }, { "epoch": 0.34, "learning_rate": 4.196870830895354e-06, "logits/chosen": -2.1750078201293945, "logits/rejected": -2.1885952949523926, "logps/chosen": -259.3610534667969, "logps/rejected": -315.05426025390625, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0161347184330225, "rewards/margins": 0.06185835599899292, "rewards/rejected": -0.07799308001995087, "step": 5140 }, { "epoch": 0.34, "learning_rate": 4.192673216390657e-06, "logits/chosen": -2.3547465801239014, "logits/rejected": -2.0885729789733887, "logps/chosen": -235.29983520507812, "logps/rejected": -204.89700317382812, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02326892875134945, "rewards/margins": 0.07924740761518478, "rewards/rejected": -0.10251633077859879, "step": 5150 }, { "epoch": 0.34, "learning_rate": 4.188466772073296e-06, "logits/chosen": -2.447526454925537, "logits/rejected": -2.1424593925476074, "logps/chosen": -224.17477416992188, "logps/rejected": -207.6041259765625, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03650229796767235, "rewards/margins": 0.04190974682569504, "rewards/rejected": -0.07841204106807709, "step": 5160 }, { "epoch": 0.34, "learning_rate": 4.184251519886148e-06, "logits/chosen": -2.190013885498047, "logits/rejected": -2.2769131660461426, "logps/chosen": -201.77218627929688, "logps/rejected": -237.6571807861328, "loss": 0.6888, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.056646548211574554, "rewards/margins": 0.07229969650506973, "rewards/rejected": -0.1289462298154831, "step": 5170 }, { "epoch": 0.34, "learning_rate": 4.180027481818033e-06, "logits/chosen": -2.3080639839172363, "logits/rejected": -2.2970948219299316, "logps/chosen": -269.02783203125, "logps/rejected": -236.8043975830078, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.023297477513551712, "rewards/margins": 0.05610079690814018, "rewards/rejected": -0.0793982669711113, "step": 5180 }, { "epoch": 0.34, "learning_rate": 4.175794679903602e-06, "logits/chosen": -2.364243745803833, "logits/rejected": -2.1234567165374756, "logps/chosen": -227.66976928710938, "logps/rejected": -164.52684020996094, "loss": 0.6917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00028926803497597575, "rewards/margins": 0.09214087575674057, "rewards/rejected": -0.09243014454841614, "step": 5190 }, { "epoch": 0.34, "learning_rate": 4.171553136223222e-06, "logits/chosen": -2.3197431564331055, "logits/rejected": -2.314145803451538, "logps/chosen": -269.05267333984375, "logps/rejected": -290.14276123046875, "loss": 0.6895, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.017318129539489746, "rewards/margins": 0.10380220413208008, "rewards/rejected": -0.12112033367156982, "step": 5200 }, { "epoch": 0.34, "eval_logits/chosen": -2.323685884475708, "eval_logits/rejected": -2.1359636783599854, "eval_logps/chosen": -234.24876403808594, "eval_logps/rejected": -221.4422149658203, "eval_loss": 0.690197765827179, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": -0.022438107058405876, "eval_rewards/margins": 0.0758652612566948, "eval_rewards/rejected": -0.09830336272716522, "eval_runtime": 712.3382, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5200 }, { "epoch": 0.34, "learning_rate": 4.167302872902865e-06, "logits/chosen": -2.3222103118896484, "logits/rejected": -2.2121872901916504, "logps/chosen": -255.7134246826172, "logps/rejected": -254.5579071044922, "loss": 0.6889, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.042452942579984665, "rewards/margins": 0.10782526433467865, "rewards/rejected": -0.1502782106399536, "step": 5210 }, { "epoch": 0.34, "learning_rate": 4.163043912113985e-06, "logits/chosen": -2.354125499725342, "logits/rejected": -2.1191887855529785, "logps/chosen": -261.3983459472656, "logps/rejected": -233.1907196044922, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02358170412480831, "rewards/margins": 0.05787007883191109, "rewards/rejected": -0.08145178854465485, "step": 5220 }, { "epoch": 0.34, "learning_rate": 4.15877627607341e-06, "logits/chosen": -2.1449849605560303, "logits/rejected": -2.0459647178649902, "logps/chosen": -218.0535430908203, "logps/rejected": -200.9430694580078, "loss": 0.6917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.014587024226784706, "rewards/margins": 0.0711187794804573, "rewards/rejected": -0.08570580184459686, "step": 5230 }, { "epoch": 0.34, "learning_rate": 4.154499987043217e-06, "logits/chosen": -2.3814520835876465, "logits/rejected": -2.1603853702545166, "logps/chosen": -225.3390655517578, "logps/rejected": -213.6746826171875, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011354709044098854, "rewards/margins": 0.10515379905700684, "rewards/rejected": -0.11650850623846054, "step": 5240 }, { "epoch": 0.34, "learning_rate": 4.150215067330625e-06, "logits/chosen": -2.206449508666992, "logits/rejected": -2.1436514854431152, "logps/chosen": -211.77523803710938, "logps/rejected": -235.5076446533203, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02726421132683754, "rewards/margins": 0.09695123136043549, "rewards/rejected": -0.12421544641256332, "step": 5250 }, { "epoch": 0.34, "learning_rate": 4.145921539287876e-06, "logits/chosen": -2.2395777702331543, "logits/rejected": -1.96908438205719, "logps/chosen": -193.36337280273438, "logps/rejected": -183.28408813476562, "loss": 0.6898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.027707424014806747, "rewards/margins": 0.09924031049013138, "rewards/rejected": -0.12694773077964783, "step": 5260 }, { "epoch": 0.34, "learning_rate": 4.141619425312115e-06, "logits/chosen": -2.3244357109069824, "logits/rejected": -1.956451654434204, "logps/chosen": -211.45498657226562, "logps/rejected": -193.4413299560547, "loss": 0.6913, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.028037140145897865, "rewards/margins": 0.06428249180316925, "rewards/rejected": -0.09231962263584137, "step": 5270 }, { "epoch": 0.35, "learning_rate": 4.1373087478452735e-06, "logits/chosen": -2.4381213188171387, "logits/rejected": -2.04194974899292, "logps/chosen": -223.4126739501953, "logps/rejected": -183.11099243164062, "loss": 0.6853, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.01827925816178322, "rewards/margins": 0.14312221109867096, "rewards/rejected": -0.12484294176101685, "step": 5280 }, { "epoch": 0.35, "learning_rate": 4.132989529373959e-06, "logits/chosen": -2.352229595184326, "logits/rejected": -1.917790174484253, "logps/chosen": -258.89263916015625, "logps/rejected": -188.7851104736328, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.029797937721014023, "rewards/margins": 0.08890985697507858, "rewards/rejected": -0.1187077984213829, "step": 5290 }, { "epoch": 0.35, "learning_rate": 4.128661792429331e-06, "logits/chosen": -2.3550148010253906, "logits/rejected": -2.1839187145233154, "logps/chosen": -257.23114013671875, "logps/rejected": -266.33551025390625, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": -0.034605689346790314, "rewards/margins": 0.042079828679561615, "rewards/rejected": -0.07668552547693253, "step": 5300 }, { "epoch": 0.35, "eval_logits/chosen": -2.310957908630371, "eval_logits/rejected": -2.124268054962158, "eval_logps/chosen": -235.38356018066406, "eval_logps/rejected": -223.17697143554688, "eval_loss": 0.6903428435325623, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -0.03378620371222496, "eval_rewards/margins": 0.08186446130275726, "eval_rewards/rejected": -0.11565067619085312, "eval_runtime": 710.7294, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 5300 }, { "epoch": 0.35, "learning_rate": 4.124325559586985e-06, "logits/chosen": -2.0685927867889404, "logits/rejected": -2.1043925285339355, "logps/chosen": -195.6849822998047, "logps/rejected": -210.3332977294922, "loss": 0.6934, "rewards/accuracies": 0.5, "rewards/chosen": -0.07628266513347626, "rewards/margins": 0.017243212088942528, "rewards/rejected": -0.09352587163448334, "step": 5310 }, { "epoch": 0.35, "learning_rate": 4.119980853466835e-06, "logits/chosen": -2.27421236038208, "logits/rejected": -1.881087064743042, "logps/chosen": -213.4599609375, "logps/rejected": -195.97486877441406, "loss": 0.6895, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.04305556043982506, "rewards/margins": 0.09629078209400177, "rewards/rejected": -0.13934634625911713, "step": 5320 }, { "epoch": 0.35, "learning_rate": 4.115627696732997e-06, "logits/chosen": -2.199984550476074, "logits/rejected": -2.0089831352233887, "logps/chosen": -194.8700714111328, "logps/rejected": -181.50704956054688, "loss": 0.6921, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0345582440495491, "rewards/margins": 0.05957914516329765, "rewards/rejected": -0.09413739293813705, "step": 5330 }, { "epoch": 0.35, "learning_rate": 4.111266112093668e-06, "logits/chosen": -2.3105640411376953, "logits/rejected": -2.106968402862549, "logps/chosen": -206.64205932617188, "logps/rejected": -236.0433349609375, "loss": 0.6889, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03174377605319023, "rewards/margins": 0.1100342720746994, "rewards/rejected": -0.14177805185317993, "step": 5340 }, { "epoch": 0.35, "learning_rate": 4.1068961223010115e-06, "logits/chosen": -2.2916486263275146, "logits/rejected": -1.9959255456924438, "logps/chosen": -273.03662109375, "logps/rejected": -259.06805419921875, "loss": 0.6876, "rewards/accuracies": 0.625, "rewards/chosen": -0.023360053077340126, "rewards/margins": 0.08707686513662338, "rewards/rejected": -0.11043691635131836, "step": 5350 }, { "epoch": 0.35, "learning_rate": 4.102517750151034e-06, "logits/chosen": -2.3448870182037354, "logits/rejected": -2.100322961807251, "logps/chosen": -295.0011901855469, "logps/rejected": -228.78164672851562, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00043646543053910136, "rewards/margins": 0.05093403905630112, "rewards/rejected": -0.050497572869062424, "step": 5360 }, { "epoch": 0.35, "learning_rate": 4.09813101848347e-06, "logits/chosen": -2.22782564163208, "logits/rejected": -2.258028984069824, "logps/chosen": -216.62051391601562, "logps/rejected": -240.86264038085938, "loss": 0.694, "rewards/accuracies": 0.625, "rewards/chosen": 0.023654133081436157, "rewards/margins": 0.07087962329387665, "rewards/rejected": -0.04722550883889198, "step": 5370 }, { "epoch": 0.35, "learning_rate": 4.093735950181659e-06, "logits/chosen": -2.1826157569885254, "logits/rejected": -2.083122968673706, "logps/chosen": -220.95883178710938, "logps/rejected": -255.71231079101562, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": 0.04514528065919876, "rewards/margins": 0.10210150480270386, "rewards/rejected": -0.0569562129676342, "step": 5380 }, { "epoch": 0.35, "learning_rate": 4.0893325681724326e-06, "logits/chosen": -2.2918612957000732, "logits/rejected": -2.234261989593506, "logps/chosen": -258.1329040527344, "logps/rejected": -252.94076538085938, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0028874489944428205, "rewards/margins": 0.07087242603302002, "rewards/rejected": -0.06798496842384338, "step": 5390 }, { "epoch": 0.35, "learning_rate": 4.084920895425988e-06, "logits/chosen": -2.240539073944092, "logits/rejected": -2.236994981765747, "logps/chosen": -237.4442901611328, "logps/rejected": -259.9585876464844, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.000796229753177613, "rewards/margins": 0.0886940062046051, "rewards/rejected": -0.08949021995067596, "step": 5400 }, { "epoch": 0.35, "eval_logits/chosen": -2.296610116958618, "eval_logits/rejected": -2.1113150119781494, "eval_logps/chosen": -229.67491149902344, "eval_logps/rejected": -216.7387237548828, "eval_loss": 0.6902357339859009, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": 0.023300452157855034, "eval_rewards/margins": 0.07456869632005692, "eval_rewards/rejected": -0.05126824975013733, "eval_runtime": 710.671, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 5400 }, { "epoch": 0.35, "learning_rate": 4.080500954955769e-06, "logits/chosen": -2.2135491371154785, "logits/rejected": -1.9314069747924805, "logps/chosen": -254.9764862060547, "logps/rejected": -243.9836883544922, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": 0.008954535238444805, "rewards/margins": 0.07293415814638138, "rewards/rejected": -0.0639796257019043, "step": 5410 }, { "epoch": 0.35, "learning_rate": 4.076072769818354e-06, "logits/chosen": -2.4696648120880127, "logits/rejected": -2.026599168777466, "logps/chosen": -242.1929473876953, "logps/rejected": -186.80911254882812, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.015293831005692482, "rewards/margins": 0.06581826508045197, "rewards/rejected": -0.05052444338798523, "step": 5420 }, { "epoch": 0.36, "learning_rate": 4.071636363113323e-06, "logits/chosen": -2.07266902923584, "logits/rejected": -1.993199110031128, "logps/chosen": -264.7987976074219, "logps/rejected": -217.65054321289062, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.033312998712062836, "rewards/margins": 0.0689430683851242, "rewards/rejected": -0.03563006594777107, "step": 5430 }, { "epoch": 0.36, "learning_rate": 4.067191757983146e-06, "logits/chosen": -2.0450809001922607, "logits/rejected": -2.091036558151245, "logps/chosen": -226.9083709716797, "logps/rejected": -234.5966033935547, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02667292393743992, "rewards/margins": 0.10902222245931625, "rewards/rejected": -0.08234930038452148, "step": 5440 }, { "epoch": 0.36, "learning_rate": 4.062738977613063e-06, "logits/chosen": -2.237396717071533, "logits/rejected": -2.115265130996704, "logps/chosen": -232.11581420898438, "logps/rejected": -191.1370391845703, "loss": 0.689, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.023604759946465492, "rewards/margins": 0.07414443045854568, "rewards/rejected": -0.05053967237472534, "step": 5450 }, { "epoch": 0.36, "learning_rate": 4.058278045230957e-06, "logits/chosen": -2.193748950958252, "logits/rejected": -2.1995644569396973, "logps/chosen": -218.991455078125, "logps/rejected": -224.9091796875, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": -0.006245986558496952, "rewards/margins": 0.04330100864171982, "rewards/rejected": -0.049546997994184494, "step": 5460 }, { "epoch": 0.36, "learning_rate": 4.053808984107235e-06, "logits/chosen": -2.3936734199523926, "logits/rejected": -2.0713841915130615, "logps/chosen": -233.24472045898438, "logps/rejected": -198.1108856201172, "loss": 0.6919, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.003764042630791664, "rewards/margins": 0.05159348249435425, "rewards/rejected": -0.05535752698779106, "step": 5470 }, { "epoch": 0.36, "learning_rate": 4.04933181755471e-06, "logits/chosen": -2.3855273723602295, "logits/rejected": -2.308960437774658, "logps/chosen": -206.5284423828125, "logps/rejected": -206.90713500976562, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0005217326688580215, "rewards/margins": 0.0759122297167778, "rewards/rejected": -0.07539048790931702, "step": 5480 }, { "epoch": 0.36, "learning_rate": 4.044846568928477e-06, "logits/chosen": -2.2790474891662598, "logits/rejected": -2.3462178707122803, "logps/chosen": -261.7860107421875, "logps/rejected": -259.61175537109375, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": -0.002618322381749749, "rewards/margins": 0.06683683395385742, "rewards/rejected": -0.06945516169071198, "step": 5490 }, { "epoch": 0.36, "learning_rate": 4.040353261625788e-06, "logits/chosen": -2.444617748260498, "logits/rejected": -2.0571980476379395, "logps/chosen": -275.5302734375, "logps/rejected": -246.3635711669922, "loss": 0.6884, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.010149752721190453, "rewards/margins": 0.09463830292224884, "rewards/rejected": -0.08448855578899384, "step": 5500 }, { "epoch": 0.36, "eval_logits/chosen": -2.290515661239624, "eval_logits/rejected": -2.1054341793060303, "eval_logps/chosen": -232.49339294433594, "eval_logps/rejected": -219.36749267578125, "eval_loss": 0.6903730630874634, "eval_rewards/accuracies": 0.6230000257492065, "eval_rewards/chosen": -0.004884431138634682, "eval_rewards/margins": 0.07267154008150101, "eval_rewards/rejected": -0.07755597680807114, "eval_runtime": 709.5644, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.409, "step": 5500 }, { "epoch": 0.36, "learning_rate": 4.035851919085936e-06, "logits/chosen": -2.2773475646972656, "logits/rejected": -2.1437253952026367, "logps/chosen": -268.17034912109375, "logps/rejected": -197.60702514648438, "loss": 0.688, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.007762535475194454, "rewards/margins": 0.08148452639579773, "rewards/rejected": -0.08924706280231476, "step": 5510 }, { "epoch": 0.36, "learning_rate": 4.031342564790128e-06, "logits/chosen": -2.2170324325561523, "logits/rejected": -2.0734400749206543, "logps/chosen": -204.36358642578125, "logps/rejected": -210.1439666748047, "loss": 0.6871, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.016081126406788826, "rewards/margins": 0.10553131252527237, "rewards/rejected": -0.0894501805305481, "step": 5520 }, { "epoch": 0.36, "learning_rate": 4.026825222261367e-06, "logits/chosen": -2.228926658630371, "logits/rejected": -1.9831037521362305, "logps/chosen": -179.34925842285156, "logps/rejected": -177.38687133789062, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06274209171533585, "rewards/margins": 0.05180812627077103, "rewards/rejected": -0.11455021053552628, "step": 5530 }, { "epoch": 0.36, "learning_rate": 4.022299915064321e-06, "logits/chosen": -2.300727605819702, "logits/rejected": -2.096855878829956, "logps/chosen": -305.073486328125, "logps/rejected": -261.6318054199219, "loss": 0.6916, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.007745129056274891, "rewards/margins": 0.09663836658000946, "rewards/rejected": -0.08889324963092804, "step": 5540 }, { "epoch": 0.36, "learning_rate": 4.017766666805213e-06, "logits/chosen": -2.120983839035034, "logits/rejected": -2.074357271194458, "logps/chosen": -215.736328125, "logps/rejected": -188.95654296875, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02215953730046749, "rewards/margins": 0.08581807464361191, "rewards/rejected": -0.10797761380672455, "step": 5550 }, { "epoch": 0.36, "learning_rate": 4.013225501131684e-06, "logits/chosen": -2.3249385356903076, "logits/rejected": -2.043680191040039, "logps/chosen": -217.39242553710938, "logps/rejected": -197.59507751464844, "loss": 0.691, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.00014842339442111552, "rewards/margins": 0.071754589676857, "rewards/rejected": -0.07190301269292831, "step": 5560 }, { "epoch": 0.36, "learning_rate": 4.008676441732679e-06, "logits/chosen": -2.2676405906677246, "logits/rejected": -1.9487760066986084, "logps/chosen": -207.52322387695312, "logps/rejected": -164.7802734375, "loss": 0.6913, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.012288715690374374, "rewards/margins": 0.05257941409945488, "rewards/rejected": -0.06486812978982925, "step": 5570 }, { "epoch": 0.37, "learning_rate": 4.00411951233832e-06, "logits/chosen": -2.412111520767212, "logits/rejected": -2.0756661891937256, "logps/chosen": -220.82858276367188, "logps/rejected": -184.4314727783203, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": -0.011034145951271057, "rewards/margins": 0.0827646404504776, "rewards/rejected": -0.09379879385232925, "step": 5580 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -2.14727520942688, "logits/rejected": -2.033402919769287, "logps/chosen": -291.1112976074219, "logps/rejected": -254.0940704345703, "loss": 0.6907, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008655655197799206, "rewards/margins": 0.06500723958015442, "rewards/rejected": -0.056351590901613235, "step": 5590 }, { "epoch": 0.37, "learning_rate": 3.994982138689177e-06, "logits/chosen": -2.4407970905303955, "logits/rejected": -2.242966413497925, "logps/chosen": -236.0352783203125, "logps/rejected": -238.973876953125, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013648864813148975, "rewards/margins": 0.047897934913635254, "rewards/rejected": -0.03424907475709915, "step": 5600 }, { "epoch": 0.37, "eval_logits/chosen": -2.2870538234710693, "eval_logits/rejected": -2.1021108627319336, "eval_logps/chosen": -232.2427520751953, "eval_logps/rejected": -219.23269653320312, "eval_loss": 0.6902966499328613, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.0023779442999511957, "eval_rewards/margins": 0.07382997125387192, "eval_rewards/rejected": -0.07620792090892792, "eval_runtime": 710.4152, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.408, "step": 5600 }, { "epoch": 0.37, "learning_rate": 3.990401742099408e-06, "logits/chosen": -2.104093074798584, "logits/rejected": -2.1244332790374756, "logps/chosen": -179.53659057617188, "logps/rejected": -176.4581298828125, "loss": 0.692, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.006734983064234257, "rewards/margins": 0.04270303249359131, "rewards/rejected": -0.04943801462650299, "step": 5610 }, { "epoch": 0.37, "learning_rate": 3.985813570844072e-06, "logits/chosen": -2.2303576469421387, "logits/rejected": -2.086726188659668, "logps/chosen": -297.89227294921875, "logps/rejected": -276.9810485839844, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03505517542362213, "rewards/margins": 0.06640339642763138, "rewards/rejected": -0.10145857185125351, "step": 5620 }, { "epoch": 0.37, "learning_rate": 3.981217648857316e-06, "logits/chosen": -2.308424711227417, "logits/rejected": -2.119347333908081, "logps/chosen": -172.80674743652344, "logps/rejected": -184.7308349609375, "loss": 0.6899, "rewards/accuracies": 0.75, "rewards/chosen": -0.002663122955709696, "rewards/margins": 0.10142280906438828, "rewards/rejected": -0.10408592224121094, "step": 5630 }, { "epoch": 0.37, "learning_rate": 3.97661400011372e-06, "logits/chosen": -2.1583411693573, "logits/rejected": -2.204422950744629, "logps/chosen": -244.61489868164062, "logps/rejected": -238.1432342529297, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.015875743702054024, "rewards/margins": 0.049512267112731934, "rewards/rejected": -0.0653880164027214, "step": 5640 }, { "epoch": 0.37, "learning_rate": 3.972002648628174e-06, "logits/chosen": -2.1991384029388428, "logits/rejected": -1.8762391805648804, "logps/chosen": -277.7632751464844, "logps/rejected": -236.1746368408203, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.007324635982513428, "rewards/margins": 0.06103179603815079, "rewards/rejected": -0.06835642457008362, "step": 5650 }, { "epoch": 0.37, "learning_rate": 3.967383618455743e-06, "logits/chosen": -2.3441319465637207, "logits/rejected": -2.178091526031494, "logps/chosen": -231.02035522460938, "logps/rejected": -256.923095703125, "loss": 0.6892, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.039006225764751434, "rewards/margins": 0.07712291181087494, "rewards/rejected": -0.11612913757562637, "step": 5660 }, { "epoch": 0.37, "learning_rate": 3.9627569336915515e-06, "logits/chosen": -2.477116107940674, "logits/rejected": -2.1675541400909424, "logps/chosen": -247.08352661132812, "logps/rejected": -199.73582458496094, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.003792487783357501, "rewards/margins": 0.09328923374414444, "rewards/rejected": -0.08949675410985947, "step": 5670 }, { "epoch": 0.37, "learning_rate": 3.9581226184706555e-06, "logits/chosen": -2.326864719390869, "logits/rejected": -2.435724973678589, "logps/chosen": -193.1772918701172, "logps/rejected": -269.46685791015625, "loss": 0.6899, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.014881017617881298, "rewards/margins": 0.055919043719768524, "rewards/rejected": -0.04103802144527435, "step": 5680 }, { "epoch": 0.37, "learning_rate": 3.953480696967912e-06, "logits/chosen": -1.9913456439971924, "logits/rejected": -2.222503662109375, "logps/chosen": -210.5967254638672, "logps/rejected": -254.0486602783203, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.012251259759068489, "rewards/margins": 0.07006217539310455, "rewards/rejected": -0.0823134332895279, "step": 5690 }, { "epoch": 0.37, "learning_rate": 3.948831193397857e-06, "logits/chosen": -2.2036399841308594, "logits/rejected": -2.187148332595825, "logps/chosen": -164.9729461669922, "logps/rejected": -178.32603454589844, "loss": 0.6906, "rewards/accuracies": 0.5, "rewards/chosen": 0.010424559935927391, "rewards/margins": 0.07527212798595428, "rewards/rejected": -0.06484757363796234, "step": 5700 }, { "epoch": 0.37, "eval_logits/chosen": -2.2823386192321777, "eval_logits/rejected": -2.0973258018493652, "eval_logps/chosen": -230.5282440185547, "eval_logps/rejected": -218.62940979003906, "eval_loss": 0.6900830268859863, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": 0.014767038635909557, "eval_rewards/margins": 0.08494207262992859, "eval_rewards/rejected": -0.07017502933740616, "eval_runtime": 714.2649, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 5700 }, { "epoch": 0.37, "learning_rate": 3.94417413201458e-06, "logits/chosen": -2.185957193374634, "logits/rejected": -2.038264274597168, "logps/chosen": -208.8540496826172, "logps/rejected": -198.67984008789062, "loss": 0.6888, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.027174552902579308, "rewards/margins": 0.07592582702636719, "rewards/rejected": -0.04875127598643303, "step": 5710 }, { "epoch": 0.37, "learning_rate": 3.9395095371115935e-06, "logits/chosen": -2.339968204498291, "logits/rejected": -2.060576915740967, "logps/chosen": -209.5446319580078, "logps/rejected": -207.98910522460938, "loss": 0.6885, "rewards/accuracies": 0.5, "rewards/chosen": 0.031124601140618324, "rewards/margins": 0.09055650979280472, "rewards/rejected": -0.059431903064250946, "step": 5720 }, { "epoch": 0.37, "learning_rate": 3.93483743302171e-06, "logits/chosen": -2.2814464569091797, "logits/rejected": -2.04420804977417, "logps/chosen": -208.9713592529297, "logps/rejected": -190.6579132080078, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03820297122001648, "rewards/margins": 0.0542188361287117, "rewards/rejected": -0.016015859320759773, "step": 5730 }, { "epoch": 0.38, "learning_rate": 3.930157844116913e-06, "logits/chosen": -2.1364564895629883, "logits/rejected": -2.0696628093719482, "logps/chosen": -202.4792938232422, "logps/rejected": -195.42135620117188, "loss": 0.69, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.07277990877628326, "rewards/margins": 0.07660780847072601, "rewards/rejected": -0.003827892942354083, "step": 5740 }, { "epoch": 0.38, "learning_rate": 3.925470794808229e-06, "logits/chosen": -2.2870192527770996, "logits/rejected": -1.9282214641571045, "logps/chosen": -236.171875, "logps/rejected": -211.88504028320312, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.028158491477370262, "rewards/margins": 0.08530527353286743, "rewards/rejected": -0.05714678019285202, "step": 5750 }, { "epoch": 0.38, "learning_rate": 3.920776309545606e-06, "logits/chosen": -2.3560986518859863, "logits/rejected": -2.188704013824463, "logps/chosen": -147.86883544921875, "logps/rejected": -148.76467895507812, "loss": 0.6909, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.04838157445192337, "rewards/margins": 0.07728347182273865, "rewards/rejected": -0.028901899233460426, "step": 5760 }, { "epoch": 0.38, "learning_rate": 3.916074412817778e-06, "logits/chosen": -2.2517282962799072, "logits/rejected": -1.886130690574646, "logps/chosen": -239.67239379882812, "logps/rejected": -250.0625457763672, "loss": 0.6881, "rewards/accuracies": 0.75, "rewards/chosen": 0.03661612793803215, "rewards/margins": 0.09867843240499496, "rewards/rejected": -0.062062300741672516, "step": 5770 }, { "epoch": 0.38, "learning_rate": 3.911365129152139e-06, "logits/chosen": -2.3809988498687744, "logits/rejected": -2.224855899810791, "logps/chosen": -227.54214477539062, "logps/rejected": -230.674560546875, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04891723394393921, "rewards/margins": 0.08672243356704712, "rewards/rejected": -0.03780519217252731, "step": 5780 }, { "epoch": 0.38, "learning_rate": 3.906648483114623e-06, "logits/chosen": -2.2592244148254395, "logits/rejected": -2.1565563678741455, "logps/chosen": -195.49063110351562, "logps/rejected": -174.44102478027344, "loss": 0.6871, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.018330033868551254, "rewards/margins": 0.1153540164232254, "rewards/rejected": -0.09702397882938385, "step": 5790 }, { "epoch": 0.38, "learning_rate": 3.901924499309564e-06, "logits/chosen": -2.198864698410034, "logits/rejected": -2.009559154510498, "logps/chosen": -229.9792022705078, "logps/rejected": -206.246337890625, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012742845341563225, "rewards/margins": 0.08873111009597778, "rewards/rejected": -0.10147394984960556, "step": 5800 }, { "epoch": 0.38, "eval_logits/chosen": -2.2906556129455566, "eval_logits/rejected": -2.104820966720581, "eval_logps/chosen": -233.96670532226562, "eval_logps/rejected": -222.71261596679688, "eval_loss": 0.6902094483375549, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.019617412239313126, "eval_rewards/margins": 0.09138944000005722, "eval_rewards/rejected": -0.11100686341524124, "eval_runtime": 710.6333, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 5800 }, { "epoch": 0.38, "learning_rate": 3.897193202379575e-06, "logits/chosen": -2.2877416610717773, "logits/rejected": -2.117922306060791, "logps/chosen": -201.8192596435547, "logps/rejected": -193.40383911132812, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.013415491208434105, "rewards/margins": 0.0855737030506134, "rewards/rejected": -0.09898919612169266, "step": 5810 }, { "epoch": 0.38, "learning_rate": 3.8924546170054215e-06, "logits/chosen": -2.2300517559051514, "logits/rejected": -2.1765639781951904, "logps/chosen": -216.3985595703125, "logps/rejected": -204.2929229736328, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.00490927416831255, "rewards/margins": 0.0736776739358902, "rewards/rejected": -0.07858695089817047, "step": 5820 }, { "epoch": 0.38, "learning_rate": 3.887708767905883e-06, "logits/chosen": -2.492140054702759, "logits/rejected": -2.089107036590576, "logps/chosen": -243.9584503173828, "logps/rejected": -184.98101806640625, "loss": 0.6922, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.001912574516609311, "rewards/margins": 0.0686551108956337, "rewards/rejected": -0.07056768238544464, "step": 5830 }, { "epoch": 0.38, "learning_rate": 3.882955679837636e-06, "logits/chosen": -2.250488758087158, "logits/rejected": -2.1399431228637695, "logps/chosen": -238.8167724609375, "logps/rejected": -247.60751342773438, "loss": 0.6914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.005602418445050716, "rewards/margins": 0.06415437906980515, "rewards/rejected": -0.058551959693431854, "step": 5840 }, { "epoch": 0.38, "learning_rate": 3.878195377595113e-06, "logits/chosen": -2.3308000564575195, "logits/rejected": -2.1495823860168457, "logps/chosen": -235.9189453125, "logps/rejected": -239.89706420898438, "loss": 0.6902, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00013175308413337916, "rewards/margins": 0.09541679173707962, "rewards/rejected": -0.09554854780435562, "step": 5850 }, { "epoch": 0.38, "learning_rate": 3.873427886010384e-06, "logits/chosen": -2.276776075363159, "logits/rejected": -2.1244194507598877, "logps/chosen": -189.79672241210938, "logps/rejected": -179.958740234375, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006552155129611492, "rewards/margins": 0.09251097589731216, "rewards/rejected": -0.0859588235616684, "step": 5860 }, { "epoch": 0.38, "learning_rate": 3.868653229953021e-06, "logits/chosen": -2.3475050926208496, "logits/rejected": -2.118560314178467, "logps/chosen": -235.6221923828125, "logps/rejected": -243.8154754638672, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.015926335006952286, "rewards/margins": 0.10257148742675781, "rewards/rejected": -0.08664515614509583, "step": 5870 }, { "epoch": 0.38, "learning_rate": 3.8638714343299675e-06, "logits/chosen": -2.2526631355285645, "logits/rejected": -2.1661479473114014, "logps/chosen": -218.9933319091797, "logps/rejected": -241.96755981445312, "loss": 0.6876, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.006506229750812054, "rewards/margins": 0.0785847157239914, "rewards/rejected": -0.08509095013141632, "step": 5880 }, { "epoch": 0.39, "learning_rate": 3.859082524085414e-06, "logits/chosen": -2.2656807899475098, "logits/rejected": -1.9120867252349854, "logps/chosen": -273.0644226074219, "logps/rejected": -219.8885498046875, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0072127147577703, "rewards/margins": 0.07063201814889908, "rewards/rejected": -0.07784473150968552, "step": 5890 }, { "epoch": 0.39, "learning_rate": 3.854286524200659e-06, "logits/chosen": -2.423177480697632, "logits/rejected": -2.148200511932373, "logps/chosen": -276.10162353515625, "logps/rejected": -229.82730102539062, "loss": 0.6907, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0031580955255776644, "rewards/margins": 0.04362088814377785, "rewards/rejected": -0.04046279191970825, "step": 5900 }, { "epoch": 0.39, "eval_logits/chosen": -2.2796647548675537, "eval_logits/rejected": -2.094618797302246, "eval_logps/chosen": -231.79417419433594, "eval_logps/rejected": -219.75482177734375, "eval_loss": 0.6900690793991089, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.002107798121869564, "eval_rewards/margins": 0.08353700488805771, "eval_rewards/rejected": -0.08142919838428497, "eval_runtime": 710.6336, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 5900 }, { "epoch": 0.39, "learning_rate": 3.849483459693991e-06, "logits/chosen": -2.361053943634033, "logits/rejected": -2.2258360385894775, "logps/chosen": -208.7968292236328, "logps/rejected": -182.0562744140625, "loss": 0.6855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.008798656985163689, "rewards/margins": 0.11928577721118927, "rewards/rejected": -0.11048711836338043, "step": 5910 }, { "epoch": 0.39, "learning_rate": 3.844673355620544e-06, "logits/chosen": -2.253052234649658, "logits/rejected": -2.1112794876098633, "logps/chosen": -245.8029022216797, "logps/rejected": -217.4182891845703, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007700022310018539, "rewards/margins": 0.09900878369808197, "rewards/rejected": -0.09130875021219254, "step": 5920 }, { "epoch": 0.39, "learning_rate": 3.839856237072178e-06, "logits/chosen": -2.0936896800994873, "logits/rejected": -2.091200351715088, "logps/chosen": -185.02664184570312, "logps/rejected": -212.76174926757812, "loss": 0.6887, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.024525459855794907, "rewards/margins": 0.10542996972799301, "rewards/rejected": -0.1299554407596588, "step": 5930 }, { "epoch": 0.39, "learning_rate": 3.8350321291773455e-06, "logits/chosen": -2.0315639972686768, "logits/rejected": -1.9680635929107666, "logps/chosen": -200.049560546875, "logps/rejected": -164.24830627441406, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": 0.02472389116883278, "rewards/margins": 0.08395050466060638, "rewards/rejected": -0.0592266209423542, "step": 5940 }, { "epoch": 0.39, "learning_rate": 3.830201057100953e-06, "logits/chosen": -2.3199234008789062, "logits/rejected": -2.351250171661377, "logps/chosen": -190.0390167236328, "logps/rejected": -215.13748168945312, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01733965426683426, "rewards/margins": 0.0960569754242897, "rewards/rejected": -0.07871732115745544, "step": 5950 }, { "epoch": 0.39, "learning_rate": 3.82536304604424e-06, "logits/chosen": -2.111283302307129, "logits/rejected": -2.055291175842285, "logps/chosen": -232.94387817382812, "logps/rejected": -208.7888641357422, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.032203659415245056, "rewards/margins": 0.075811468064785, "rewards/rejected": -0.04360780864953995, "step": 5960 }, { "epoch": 0.39, "learning_rate": 3.8205181212446435e-06, "logits/chosen": -2.488105297088623, "logits/rejected": -2.2576441764831543, "logps/chosen": -266.7863464355469, "logps/rejected": -231.9990997314453, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": 0.03772572800517082, "rewards/margins": 0.05616292357444763, "rewards/rejected": -0.01843719184398651, "step": 5970 }, { "epoch": 0.39, "learning_rate": 3.815666307975664e-06, "logits/chosen": -2.29557466506958, "logits/rejected": -2.167210102081299, "logps/chosen": -235.0832061767578, "logps/rejected": -221.5277862548828, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.022346094250679016, "rewards/margins": 0.05626540258526802, "rewards/rejected": -0.033919308334589005, "step": 5980 }, { "epoch": 0.39, "learning_rate": 3.8108076315467346e-06, "logits/chosen": -2.3752923011779785, "logits/rejected": -2.240790367126465, "logps/chosen": -257.206787109375, "logps/rejected": -189.83230590820312, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0007598648080602288, "rewards/margins": 0.0735207051038742, "rewards/rejected": -0.07276083528995514, "step": 5990 }, { "epoch": 0.39, "learning_rate": 3.805942117303093e-06, "logits/chosen": -2.5641138553619385, "logits/rejected": -2.226461887359619, "logps/chosen": -314.4233093261719, "logps/rejected": -274.7497863769531, "loss": 0.6901, "rewards/accuracies": 0.75, "rewards/chosen": 0.016122477129101753, "rewards/margins": 0.06850017607212067, "rewards/rejected": -0.05237768962979317, "step": 6000 }, { "epoch": 0.39, "eval_logits/chosen": -2.311023235321045, "eval_logits/rejected": -2.123633861541748, "eval_logps/chosen": -231.44161987304688, "eval_logps/rejected": -218.17413330078125, "eval_loss": 0.6901270747184753, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": 0.005633320193737745, "eval_rewards/margins": 0.07125571370124817, "eval_rewards/rejected": -0.06562238931655884, "eval_runtime": 710.6252, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 6000 }, { "epoch": 0.39, "learning_rate": 3.8010697906256446e-06, "logits/chosen": -2.1471753120422363, "logits/rejected": -2.135615348815918, "logps/chosen": -210.50021362304688, "logps/rejected": -188.69100952148438, "loss": 0.6953, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.038038115948438644, "rewards/margins": 0.06020959094166756, "rewards/rejected": -0.0982476994395256, "step": 6010 }, { "epoch": 0.39, "learning_rate": 3.7961906769308323e-06, "logits/chosen": -2.19319486618042, "logits/rejected": -2.009164571762085, "logps/chosen": -206.8964385986328, "logps/rejected": -224.5199737548828, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.006612460128962994, "rewards/margins": 0.0754861980676651, "rewards/rejected": -0.08209865540266037, "step": 6020 }, { "epoch": 0.39, "learning_rate": 3.7913048016705028e-06, "logits/chosen": -2.2544267177581787, "logits/rejected": -2.210512638092041, "logps/chosen": -259.06982421875, "logps/rejected": -262.7820739746094, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.008697474375367165, "rewards/margins": 0.057602547109127045, "rewards/rejected": -0.048905082046985626, "step": 6030 }, { "epoch": 0.4, "learning_rate": 3.786412190331775e-06, "logits/chosen": -2.447171211242676, "logits/rejected": -2.118438720703125, "logps/chosen": -201.5836944580078, "logps/rejected": -172.86338806152344, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012768149375915527, "rewards/margins": 0.08379059284925461, "rewards/rejected": -0.07102244347333908, "step": 6040 }, { "epoch": 0.4, "learning_rate": 3.781512868436906e-06, "logits/chosen": -2.435929775238037, "logits/rejected": -2.2919459342956543, "logps/chosen": -129.0099639892578, "logps/rejected": -143.07064819335938, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03600788861513138, "rewards/margins": 0.0627172440290451, "rewards/rejected": -0.02670934796333313, "step": 6050 }, { "epoch": 0.4, "learning_rate": 3.7766068615431605e-06, "logits/chosen": -2.2673373222351074, "logits/rejected": -2.2142586708068848, "logps/chosen": -254.04931640625, "logps/rejected": -212.45474243164062, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.030377531424164772, "rewards/margins": 0.07040407508611679, "rewards/rejected": -0.04002653807401657, "step": 6060 }, { "epoch": 0.4, "learning_rate": 3.771694195242671e-06, "logits/chosen": -2.4878172874450684, "logits/rejected": -2.041779041290283, "logps/chosen": -297.07598876953125, "logps/rejected": -191.4386444091797, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.021027540788054466, "rewards/margins": 0.06093548983335495, "rewards/rejected": -0.039907947182655334, "step": 6070 }, { "epoch": 0.4, "learning_rate": 3.766774895162314e-06, "logits/chosen": -2.3142666816711426, "logits/rejected": -2.210665225982666, "logps/chosen": -244.95858764648438, "logps/rejected": -193.0878143310547, "loss": 0.6915, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.015811407938599586, "rewards/margins": 0.04264925420284271, "rewards/rejected": -0.026837846264243126, "step": 6080 }, { "epoch": 0.4, "learning_rate": 3.7618489869635666e-06, "logits/chosen": -2.227144479751587, "logits/rejected": -2.172423839569092, "logps/chosen": -247.10653686523438, "logps/rejected": -232.07986450195312, "loss": 0.6924, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.000796070322394371, "rewards/margins": 0.04716240614652634, "rewards/rejected": -0.04795847460627556, "step": 6090 }, { "epoch": 0.4, "learning_rate": 3.756916496342379e-06, "logits/chosen": -2.2466297149658203, "logits/rejected": -2.30493426322937, "logps/chosen": -184.02410888671875, "logps/rejected": -198.5399932861328, "loss": 0.6889, "rewards/accuracies": 0.5, "rewards/chosen": 0.028477992862462997, "rewards/margins": 0.060630954802036285, "rewards/rejected": -0.03215295821428299, "step": 6100 }, { "epoch": 0.4, "eval_logits/chosen": -2.3196463584899902, "eval_logits/rejected": -2.131615161895752, "eval_logps/chosen": -228.611572265625, "eval_logps/rejected": -215.37451171875, "eval_loss": 0.6901247501373291, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": 0.0339338555932045, "eval_rewards/margins": 0.0715600922703743, "eval_rewards/rejected": -0.0376262404024601, "eval_runtime": 710.3418, "eval_samples_per_second": 2.816, "eval_steps_per_second": 1.408, "step": 6100 }, { "epoch": 0.4, "learning_rate": 3.751977449029039e-06, "logits/chosen": -1.9810413122177124, "logits/rejected": -1.983925461769104, "logps/chosen": -258.67047119140625, "logps/rejected": -227.87368774414062, "loss": 0.6919, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.014060038141906261, "rewards/margins": 0.06598879396915436, "rewards/rejected": -0.05192875862121582, "step": 6110 }, { "epoch": 0.4, "learning_rate": 3.747031870788037e-06, "logits/chosen": -2.4974024295806885, "logits/rejected": -2.138140916824341, "logps/chosen": -313.6204833984375, "logps/rejected": -238.707763671875, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03132764250040054, "rewards/margins": 0.07506566494703293, "rewards/rejected": -0.043738026171922684, "step": 6120 }, { "epoch": 0.4, "learning_rate": 3.7420797874179326e-06, "logits/chosen": -2.277357578277588, "logits/rejected": -2.018991708755493, "logps/chosen": -236.95089721679688, "logps/rejected": -186.17581176757812, "loss": 0.6896, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0015857883263379335, "rewards/margins": 0.08113957941532135, "rewards/rejected": -0.07955377548933029, "step": 6130 }, { "epoch": 0.4, "learning_rate": 3.7371212247512167e-06, "logits/chosen": -2.6132965087890625, "logits/rejected": -2.2684082984924316, "logps/chosen": -322.7628173828125, "logps/rejected": -267.7512512207031, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03631674125790596, "rewards/margins": 0.10096652805805206, "rewards/rejected": -0.0646497905254364, "step": 6140 }, { "epoch": 0.4, "learning_rate": 3.7321562086541817e-06, "logits/chosen": -2.3771257400512695, "logits/rejected": -2.2703232765197754, "logps/chosen": -251.13504028320312, "logps/rejected": -260.8123474121094, "loss": 0.6908, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.01845073141157627, "rewards/margins": 0.08887068182229996, "rewards/rejected": -0.07041995227336884, "step": 6150 }, { "epoch": 0.4, "learning_rate": 3.7271847650267834e-06, "logits/chosen": -2.168355703353882, "logits/rejected": -2.0670669078826904, "logps/chosen": -199.58877563476562, "logps/rejected": -204.58006286621094, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.003458759281784296, "rewards/margins": 0.058815740048885345, "rewards/rejected": -0.05535699054598808, "step": 6160 }, { "epoch": 0.4, "learning_rate": 3.7222069198025086e-06, "logits/chosen": -2.1603808403015137, "logits/rejected": -1.9670292139053345, "logps/chosen": -209.85214233398438, "logps/rejected": -197.63839721679688, "loss": 0.6892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.013096879236400127, "rewards/margins": 0.08378522843122482, "rewards/rejected": -0.09688210487365723, "step": 6170 }, { "epoch": 0.4, "learning_rate": 3.7172226989482353e-06, "logits/chosen": -2.1758627891540527, "logits/rejected": -1.9652674198150635, "logps/chosen": -209.81430053710938, "logps/rejected": -212.7050323486328, "loss": 0.6916, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0015852168435230851, "rewards/margins": 0.05035899206995964, "rewards/rejected": -0.04877377673983574, "step": 6180 }, { "epoch": 0.4, "learning_rate": 3.7122321284641007e-06, "logits/chosen": -2.4331085681915283, "logits/rejected": -1.9999637603759766, "logps/chosen": -363.33984375, "logps/rejected": -262.52734375, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0055319941602647305, "rewards/margins": 0.09677392989397049, "rewards/rejected": -0.10230592638254166, "step": 6190 }, { "epoch": 0.41, "learning_rate": 3.707235234383365e-06, "logits/chosen": -2.3560452461242676, "logits/rejected": -2.0064382553100586, "logps/chosen": -258.49041748046875, "logps/rejected": -178.62417602539062, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.027635321021080017, "rewards/margins": 0.06912614405155182, "rewards/rejected": -0.0414908304810524, "step": 6200 }, { "epoch": 0.41, "eval_logits/chosen": -2.3145864009857178, "eval_logits/rejected": -2.126389265060425, "eval_logps/chosen": -229.6930694580078, "eval_logps/rejected": -217.35781860351562, "eval_loss": 0.689987301826477, "eval_rewards/accuracies": 0.6284999847412109, "eval_rewards/chosen": 0.023118959739804268, "eval_rewards/margins": 0.08057821542024612, "eval_rewards/rejected": -0.05745925009250641, "eval_runtime": 712.4839, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 6200 }, { "epoch": 0.41, "learning_rate": 3.702232042772277e-06, "logits/chosen": -2.1632602214813232, "logits/rejected": -2.1070027351379395, "logps/chosen": -201.1727294921875, "logps/rejected": -189.2860565185547, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": 0.013734388165175915, "rewards/margins": 0.10467977821826935, "rewards/rejected": -0.09094538539648056, "step": 6210 }, { "epoch": 0.41, "learning_rate": 3.6972225797299325e-06, "logits/chosen": -2.2812116146087646, "logits/rejected": -2.31536602973938, "logps/chosen": -251.81900024414062, "logps/rejected": -243.4286651611328, "loss": 0.6881, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.013103622011840343, "rewards/margins": 0.07545242458581924, "rewards/rejected": -0.08855602890253067, "step": 6220 }, { "epoch": 0.41, "learning_rate": 3.692206871388147e-06, "logits/chosen": -2.3902740478515625, "logits/rejected": -1.9351933002471924, "logps/chosen": -232.5216827392578, "logps/rejected": -202.56259155273438, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0013294353848323226, "rewards/margins": 0.1127009242773056, "rewards/rejected": -0.11403036117553711, "step": 6230 }, { "epoch": 0.41, "learning_rate": 3.6871849439113115e-06, "logits/chosen": -2.090280294418335, "logits/rejected": -2.0177419185638428, "logps/chosen": -228.4338836669922, "logps/rejected": -225.20547485351562, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.014769596047699451, "rewards/margins": 0.0631939023733139, "rewards/rejected": -0.048424310982227325, "step": 6240 }, { "epoch": 0.41, "learning_rate": 3.682156823496259e-06, "logits/chosen": -2.3378641605377197, "logits/rejected": -2.049996852874756, "logps/chosen": -209.41159057617188, "logps/rejected": -187.8102569580078, "loss": 0.6914, "rewards/accuracies": 0.75, "rewards/chosen": 0.045023590326309204, "rewards/margins": 0.10764230787754059, "rewards/rejected": -0.06261870265007019, "step": 6250 }, { "epoch": 0.41, "learning_rate": 3.67712253637213e-06, "logits/chosen": -2.363736629486084, "logits/rejected": -2.1265621185302734, "logps/chosen": -291.31158447265625, "logps/rejected": -208.5716552734375, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.009828880429267883, "rewards/margins": 0.07172620296478271, "rewards/rejected": -0.06189732626080513, "step": 6260 }, { "epoch": 0.41, "learning_rate": 3.672082108800231e-06, "logits/chosen": -2.1761693954467773, "logits/rejected": -1.9752609729766846, "logps/chosen": -216.0406951904297, "logps/rejected": -189.57601928710938, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.022248603403568268, "rewards/margins": 0.07574795186519623, "rewards/rejected": -0.0979965478181839, "step": 6270 }, { "epoch": 0.41, "learning_rate": 3.6670355670739012e-06, "logits/chosen": -2.2948927879333496, "logits/rejected": -2.105381488800049, "logps/chosen": -160.06436157226562, "logps/rejected": -167.32986450195312, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.018890656530857086, "rewards/margins": 0.09904204308986664, "rewards/rejected": -0.08015139400959015, "step": 6280 }, { "epoch": 0.41, "learning_rate": 3.6619829375183745e-06, "logits/chosen": -2.451979160308838, "logits/rejected": -2.256770610809326, "logps/chosen": -223.16323852539062, "logps/rejected": -218.20315551757812, "loss": 0.6887, "rewards/accuracies": 0.75, "rewards/chosen": 0.02839289978146553, "rewards/margins": 0.11110793054103851, "rewards/rejected": -0.08271503448486328, "step": 6290 }, { "epoch": 0.41, "learning_rate": 3.6569242464906427e-06, "logits/chosen": -2.307574987411499, "logits/rejected": -2.1314024925231934, "logps/chosen": -202.07757568359375, "logps/rejected": -233.34414672851562, "loss": 0.6871, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.05651476979255676, "rewards/margins": 0.10217426717281342, "rewards/rejected": -0.04565950110554695, "step": 6300 }, { "epoch": 0.41, "eval_logits/chosen": -2.3068814277648926, "eval_logits/rejected": -2.120007276535034, "eval_logps/chosen": -227.68896484375, "eval_logps/rejected": -215.39700317382812, "eval_loss": 0.6900160312652588, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.04315978288650513, "eval_rewards/margins": 0.0810108482837677, "eval_rewards/rejected": -0.03785106539726257, "eval_runtime": 711.6068, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 6300 }, { "epoch": 0.41, "learning_rate": 3.6518595203793156e-06, "logits/chosen": -2.1580138206481934, "logits/rejected": -2.1727097034454346, "logps/chosen": -251.7514190673828, "logps/rejected": -275.81744384765625, "loss": 0.6906, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.06962844729423523, "rewards/margins": 0.10591252893209457, "rewards/rejected": -0.03628408536314964, "step": 6310 }, { "epoch": 0.41, "learning_rate": 3.646788785604485e-06, "logits/chosen": -2.288708209991455, "logits/rejected": -2.1455512046813965, "logps/chosen": -199.6431884765625, "logps/rejected": -203.20138549804688, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.060151923447847366, "rewards/margins": 0.05803212523460388, "rewards/rejected": 0.002119800541549921, "step": 6320 }, { "epoch": 0.41, "learning_rate": 3.641712068617588e-06, "logits/chosen": -2.30918550491333, "logits/rejected": -2.1908669471740723, "logps/chosen": -255.9428253173828, "logps/rejected": -208.373046875, "loss": 0.693, "rewards/accuracies": 0.625, "rewards/chosen": 0.054172348231077194, "rewards/margins": 0.05974752828478813, "rewards/rejected": -0.005575177259743214, "step": 6330 }, { "epoch": 0.41, "learning_rate": 3.6366293959012673e-06, "logits/chosen": -2.25718355178833, "logits/rejected": -2.020131826400757, "logps/chosen": -176.4119415283203, "logps/rejected": -165.9830780029297, "loss": 0.6883, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04790060594677925, "rewards/margins": 0.09094378352165222, "rewards/rejected": -0.04304318130016327, "step": 6340 }, { "epoch": 0.42, "learning_rate": 3.631540793969233e-06, "logits/chosen": -2.403151035308838, "logits/rejected": -2.379413604736328, "logps/chosen": -183.47628784179688, "logps/rejected": -190.08969116210938, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.051414500921964645, "rewards/margins": 0.04935113340616226, "rewards/rejected": 0.002063371241092682, "step": 6350 }, { "epoch": 0.42, "learning_rate": 3.626446289366127e-06, "logits/chosen": -2.583311080932617, "logits/rejected": -2.1373889446258545, "logps/chosen": -218.4346160888672, "logps/rejected": -150.66162109375, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01714567467570305, "rewards/margins": 0.037458065897226334, "rewards/rejected": -0.020312385633587837, "step": 6360 }, { "epoch": 0.42, "learning_rate": 3.6213459086673786e-06, "logits/chosen": -2.3131918907165527, "logits/rejected": -2.375338315963745, "logps/chosen": -168.5972900390625, "logps/rejected": -186.1190185546875, "loss": 0.6881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02340294048190117, "rewards/margins": 0.08148431777954102, "rewards/rejected": -0.05808137729763985, "step": 6370 }, { "epoch": 0.42, "learning_rate": 3.6162396784790737e-06, "logits/chosen": -2.1972436904907227, "logits/rejected": -2.225222110748291, "logps/chosen": -221.7857666015625, "logps/rejected": -227.5126495361328, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.012212952598929405, "rewards/margins": 0.058864694088697433, "rewards/rejected": -0.04665173962712288, "step": 6380 }, { "epoch": 0.42, "learning_rate": 3.6111276254378095e-06, "logits/chosen": -2.2998032569885254, "logits/rejected": -2.295097827911377, "logps/chosen": -222.982177734375, "logps/rejected": -229.3250732421875, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04974722862243652, "rewards/margins": 0.10756466537714005, "rewards/rejected": -0.05781743675470352, "step": 6390 }, { "epoch": 0.42, "learning_rate": 3.606009776210559e-06, "logits/chosen": -2.290212392807007, "logits/rejected": -2.0554823875427246, "logps/chosen": -239.64724731445312, "logps/rejected": -205.0599365234375, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": 0.009764440357685089, "rewards/margins": 0.09071458876132965, "rewards/rejected": -0.08095015585422516, "step": 6400 }, { "epoch": 0.42, "eval_logits/chosen": -2.3204777240753174, "eval_logits/rejected": -2.1320362091064453, "eval_logps/chosen": -229.05621337890625, "eval_logps/rejected": -217.79949951171875, "eval_loss": 0.6900865435600281, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": 0.02948746271431446, "eval_rewards/margins": 0.09136352688074112, "eval_rewards/rejected": -0.06187606602907181, "eval_runtime": 713.0231, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 6400 }, { "epoch": 0.42, "learning_rate": 3.600886157494531e-06, "logits/chosen": -2.4270999431610107, "logits/rejected": -2.302009105682373, "logps/chosen": -260.2608642578125, "logps/rejected": -257.4969787597656, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": 0.025956381112337112, "rewards/margins": 0.09018175303936005, "rewards/rejected": -0.06422537565231323, "step": 6410 }, { "epoch": 0.42, "learning_rate": 3.5957567960170304e-06, "logits/chosen": -2.5143160820007324, "logits/rejected": -1.7987537384033203, "logps/chosen": -286.97247314453125, "logps/rejected": -178.82583618164062, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04212607815861702, "rewards/margins": 0.10511051118373871, "rewards/rejected": -0.06298444420099258, "step": 6420 }, { "epoch": 0.42, "learning_rate": 3.590621718535319e-06, "logits/chosen": -2.1314139366149902, "logits/rejected": -1.941144347190857, "logps/chosen": -197.22164916992188, "logps/rejected": -206.35385131835938, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.004992819391191006, "rewards/margins": 0.100247822701931, "rewards/rejected": -0.10524062812328339, "step": 6430 }, { "epoch": 0.42, "learning_rate": 3.5854809518364775e-06, "logits/chosen": -2.3986408710479736, "logits/rejected": -2.087287425994873, "logps/chosen": -241.9314422607422, "logps/rejected": -209.21890258789062, "loss": 0.6875, "rewards/accuracies": 0.625, "rewards/chosen": 0.04215322062373161, "rewards/margins": 0.11800198256969452, "rewards/rejected": -0.07584875077009201, "step": 6440 }, { "epoch": 0.42, "learning_rate": 3.580334522737262e-06, "logits/chosen": -2.312293529510498, "logits/rejected": -2.025383472442627, "logps/chosen": -197.6604766845703, "logps/rejected": -173.70423889160156, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04283946007490158, "rewards/margins": 0.09693354368209839, "rewards/rejected": -0.05409408360719681, "step": 6450 }, { "epoch": 0.42, "learning_rate": 3.575182458083968e-06, "logits/chosen": -2.2589616775512695, "logits/rejected": -2.2055575847625732, "logps/chosen": -239.0443878173828, "logps/rejected": -221.5413818359375, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": 0.017606602981686592, "rewards/margins": 0.10502012073993683, "rewards/rejected": -0.08741351217031479, "step": 6460 }, { "epoch": 0.42, "learning_rate": 3.5700247847522883e-06, "logits/chosen": -2.358262062072754, "logits/rejected": -2.2695822715759277, "logps/chosen": -197.18240356445312, "logps/rejected": -207.05166625976562, "loss": 0.6882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.056155987083911896, "rewards/margins": 0.09887482225894928, "rewards/rejected": -0.04271883890032768, "step": 6470 }, { "epoch": 0.42, "learning_rate": 3.5648615296471743e-06, "logits/chosen": -2.153874635696411, "logits/rejected": -2.1209194660186768, "logps/chosen": -193.21182250976562, "logps/rejected": -241.32669067382812, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04150991886854172, "rewards/margins": 0.10041022300720215, "rewards/rejected": -0.05890030413866043, "step": 6480 }, { "epoch": 0.42, "learning_rate": 3.559692719702693e-06, "logits/chosen": -2.1794726848602295, "logits/rejected": -1.8666191101074219, "logps/chosen": -283.8521728515625, "logps/rejected": -234.9342498779297, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": 0.022929811850190163, "rewards/margins": 0.07811586558818817, "rewards/rejected": -0.05518605187535286, "step": 6490 }, { "epoch": 0.43, "learning_rate": 3.55451838188189e-06, "logits/chosen": -2.293243885040283, "logits/rejected": -2.1898462772369385, "logps/chosen": -257.29473876953125, "logps/rejected": -287.2796630859375, "loss": 0.6918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03992018103599548, "rewards/margins": 0.08051706850528717, "rewards/rejected": -0.04059688374400139, "step": 6500 }, { "epoch": 0.43, "eval_logits/chosen": -2.3292880058288574, "eval_logits/rejected": -2.1406772136688232, "eval_logps/chosen": -229.60731506347656, "eval_logps/rejected": -217.20216369628906, "eval_loss": 0.6900708675384521, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.023976394906640053, "eval_rewards/margins": 0.07987881451845169, "eval_rewards/rejected": -0.05590242147445679, "eval_runtime": 713.1525, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 6500 }, { "epoch": 0.43, "learning_rate": 3.549338543176645e-06, "logits/chosen": -2.3476295471191406, "logits/rejected": -2.080786943435669, "logps/chosen": -302.98834228515625, "logps/rejected": -277.6363220214844, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03500208631157875, "rewards/margins": 0.07001151889562607, "rewards/rejected": -0.035009440034627914, "step": 6510 }, { "epoch": 0.43, "learning_rate": 3.5441532306075342e-06, "logits/chosen": -2.294619083404541, "logits/rejected": -2.282924175262451, "logps/chosen": -231.687255859375, "logps/rejected": -283.9492492675781, "loss": 0.6924, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.002680377336218953, "rewards/margins": 0.056967295706272125, "rewards/rejected": -0.05964766815304756, "step": 6520 }, { "epoch": 0.43, "learning_rate": 3.5389624712236894e-06, "logits/chosen": -2.295729637145996, "logits/rejected": -2.0658695697784424, "logps/chosen": -212.0193328857422, "logps/rejected": -188.84963989257812, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": 0.020164761692285538, "rewards/margins": 0.037581443786621094, "rewards/rejected": -0.017416680231690407, "step": 6530 }, { "epoch": 0.43, "learning_rate": 3.533766292102653e-06, "logits/chosen": -2.2568297386169434, "logits/rejected": -2.117800235748291, "logps/chosen": -210.57046508789062, "logps/rejected": -203.5366973876953, "loss": 0.6898, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.007259045727550983, "rewards/margins": 0.04654930159449577, "rewards/rejected": -0.053808342665433884, "step": 6540 }, { "epoch": 0.43, "learning_rate": 3.5285647203502404e-06, "logits/chosen": -2.4623608589172363, "logits/rejected": -2.3327670097351074, "logps/chosen": -253.4337615966797, "logps/rejected": -221.22885131835938, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.038372620940208435, "rewards/margins": 0.055611032992601395, "rewards/rejected": -0.01723841205239296, "step": 6550 }, { "epoch": 0.43, "learning_rate": 3.5233577831003983e-06, "logits/chosen": -2.287559986114502, "logits/rejected": -2.138993501663208, "logps/chosen": -249.0475616455078, "logps/rejected": -233.41311645507812, "loss": 0.6888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03128386288881302, "rewards/margins": 0.08034192025661469, "rewards/rejected": -0.04905804991722107, "step": 6560 }, { "epoch": 0.43, "learning_rate": 3.5181455075150628e-06, "logits/chosen": -2.2086570262908936, "logits/rejected": -1.8272384405136108, "logps/chosen": -180.17086791992188, "logps/rejected": -139.23220825195312, "loss": 0.6917, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0177529938519001, "rewards/margins": 0.05684244632720947, "rewards/rejected": -0.03908945992588997, "step": 6570 }, { "epoch": 0.43, "learning_rate": 3.512927920784016e-06, "logits/chosen": -2.37443208694458, "logits/rejected": -2.232402801513672, "logps/chosen": -221.47036743164062, "logps/rejected": -214.24496459960938, "loss": 0.6877, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.042037345468997955, "rewards/margins": 0.1275465041399002, "rewards/rejected": -0.08550916612148285, "step": 6580 }, { "epoch": 0.43, "learning_rate": 3.5077050501247457e-06, "logits/chosen": -2.4588403701782227, "logits/rejected": -2.0066471099853516, "logps/chosen": -274.39154052734375, "logps/rejected": -217.39682006835938, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": 0.06623397767543793, "rewards/margins": 0.10393860191106796, "rewards/rejected": -0.037704624235630035, "step": 6590 }, { "epoch": 0.43, "learning_rate": 3.5024769227823042e-06, "logits/chosen": -2.353419065475464, "logits/rejected": -2.2162890434265137, "logps/chosen": -161.44163513183594, "logps/rejected": -128.65968322753906, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": 0.014296752400696278, "rewards/margins": 0.08015719056129456, "rewards/rejected": -0.06586043536663055, "step": 6600 }, { "epoch": 0.43, "eval_logits/chosen": -2.337346076965332, "eval_logits/rejected": -2.1480324268341064, "eval_logps/chosen": -228.5489959716797, "eval_logps/rejected": -215.88450622558594, "eval_loss": 0.6900503039360046, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.03455958515405655, "eval_rewards/margins": 0.07728561758995056, "eval_rewards/rejected": -0.042726028710603714, "eval_runtime": 711.4362, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 6600 }, { "epoch": 0.43, "learning_rate": 3.4972435660291646e-06, "logits/chosen": -2.4253835678100586, "logits/rejected": -2.281532049179077, "logps/chosen": -240.89852905273438, "logps/rejected": -223.0607452392578, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022457608953118324, "rewards/margins": 0.07101938128471375, "rewards/rejected": -0.04856177791953087, "step": 6610 }, { "epoch": 0.43, "learning_rate": 3.492005007165079e-06, "logits/chosen": -2.2975025177001953, "logits/rejected": -2.044618844985962, "logps/chosen": -224.947265625, "logps/rejected": -237.4292449951172, "loss": 0.6897, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.04589748755097389, "rewards/margins": 0.06325410306453705, "rewards/rejected": -0.017356622964143753, "step": 6620 }, { "epoch": 0.43, "learning_rate": 3.4867612735169377e-06, "logits/chosen": -2.4469258785247803, "logits/rejected": -2.0971310138702393, "logps/chosen": -220.57882690429688, "logps/rejected": -151.8156280517578, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.044555455446243286, "rewards/margins": 0.09465045481920242, "rewards/rejected": -0.05009499937295914, "step": 6630 }, { "epoch": 0.43, "learning_rate": 3.4815123924386226e-06, "logits/chosen": -2.609863758087158, "logits/rejected": -2.2980103492736816, "logps/chosen": -301.5444030761719, "logps/rejected": -238.43344116210938, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.04996975138783455, "rewards/margins": 0.06561549007892609, "rewards/rejected": -0.015645746141672134, "step": 6640 }, { "epoch": 0.44, "learning_rate": 3.4762583913108696e-06, "logits/chosen": -2.1854515075683594, "logits/rejected": -1.9473320245742798, "logps/chosen": -263.75238037109375, "logps/rejected": -230.9041290283203, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06005977839231491, "rewards/margins": 0.08066942542791367, "rewards/rejected": -0.020609647035598755, "step": 6650 }, { "epoch": 0.44, "learning_rate": 3.4709992975411217e-06, "logits/chosen": -2.3034565448760986, "logits/rejected": -1.8144235610961914, "logps/chosen": -250.67794799804688, "logps/rejected": -206.841796875, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04281472787261009, "rewards/margins": 0.08430864661931992, "rewards/rejected": -0.04149392247200012, "step": 6660 }, { "epoch": 0.44, "learning_rate": 3.4657351385633886e-06, "logits/chosen": -2.424379825592041, "logits/rejected": -2.0678696632385254, "logps/chosen": -180.195068359375, "logps/rejected": -179.3686065673828, "loss": 0.6858, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.05095613747835159, "rewards/margins": 0.11373704671859741, "rewards/rejected": -0.06278089433908463, "step": 6670 }, { "epoch": 0.44, "learning_rate": 3.4604659418381024e-06, "logits/chosen": -2.3288955688476562, "logits/rejected": -1.9227346181869507, "logps/chosen": -218.0121612548828, "logps/rejected": -198.50668334960938, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0004214264336042106, "rewards/margins": 0.10025143623352051, "rewards/rejected": -0.09983000159263611, "step": 6680 }, { "epoch": 0.44, "learning_rate": 3.4551917348519744e-06, "logits/chosen": -2.4326155185699463, "logits/rejected": -2.1617472171783447, "logps/chosen": -278.9857482910156, "logps/rejected": -239.697021484375, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.025129491463303566, "rewards/margins": 0.07911841571331024, "rewards/rejected": -0.053988922387361526, "step": 6690 }, { "epoch": 0.44, "learning_rate": 3.4499125451178505e-06, "logits/chosen": -2.010340929031372, "logits/rejected": -2.0988070964813232, "logps/chosen": -204.6190643310547, "logps/rejected": -235.9315948486328, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.003277752548456192, "rewards/margins": 0.06699816882610321, "rewards/rejected": -0.0702759176492691, "step": 6700 }, { "epoch": 0.44, "eval_logits/chosen": -2.3288984298706055, "eval_logits/rejected": -2.1398680210113525, "eval_logps/chosen": -231.943115234375, "eval_logps/rejected": -220.5701446533203, "eval_loss": 0.6900596022605896, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.0006183562218211591, "eval_rewards/margins": 0.09020071476697922, "eval_rewards/rejected": -0.08958234637975693, "eval_runtime": 710.1788, "eval_samples_per_second": 2.816, "eval_steps_per_second": 1.408, "step": 6700 }, { "epoch": 0.44, "learning_rate": 3.4446284001745723e-06, "logits/chosen": -2.14957857131958, "logits/rejected": -1.8969390392303467, "logps/chosen": -210.86453247070312, "logps/rejected": -234.8885955810547, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.046308983117341995, "rewards/margins": 0.07277072966098785, "rewards/rejected": -0.11907969415187836, "step": 6710 }, { "epoch": 0.44, "learning_rate": 3.439339327586827e-06, "logits/chosen": -2.2965493202209473, "logits/rejected": -2.2393410205841064, "logps/chosen": -188.0366973876953, "logps/rejected": -191.59263610839844, "loss": 0.6884, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.019810201600193977, "rewards/margins": 0.08919601142406464, "rewards/rejected": -0.06938581168651581, "step": 6720 }, { "epoch": 0.44, "learning_rate": 3.434045354945008e-06, "logits/chosen": -2.3867483139038086, "logits/rejected": -2.1257870197296143, "logps/chosen": -275.6525573730469, "logps/rejected": -277.5914611816406, "loss": 0.6907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0162797961384058, "rewards/margins": 0.06674771010875702, "rewards/rejected": -0.08302750438451767, "step": 6730 }, { "epoch": 0.44, "learning_rate": 3.4287465098650713e-06, "logits/chosen": -2.38753604888916, "logits/rejected": -2.387018918991089, "logps/chosen": -249.96890258789062, "logps/rejected": -239.8872833251953, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": -0.01993386633694172, "rewards/margins": 0.04759936034679413, "rewards/rejected": -0.0675332322716713, "step": 6740 }, { "epoch": 0.44, "learning_rate": 3.423442819988387e-06, "logits/chosen": -2.2356374263763428, "logits/rejected": -2.1222212314605713, "logps/chosen": -190.93006896972656, "logps/rejected": -186.69676208496094, "loss": 0.7044, "rewards/accuracies": 0.625, "rewards/chosen": -0.07542804628610611, "rewards/margins": 0.029561137780547142, "rewards/rejected": -0.1049891859292984, "step": 6750 }, { "epoch": 0.44, "learning_rate": 3.4181343129816e-06, "logits/chosen": -2.4288418292999268, "logits/rejected": -2.120914936065674, "logps/chosen": -175.26609802246094, "logps/rejected": -167.46408081054688, "loss": 0.6891, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.010955859906971455, "rewards/margins": 0.06488125026226044, "rewards/rejected": -0.053925395011901855, "step": 6760 }, { "epoch": 0.44, "learning_rate": 3.4128210165364837e-06, "logits/chosen": -2.168002128601074, "logits/rejected": -2.1744346618652344, "logps/chosen": -187.72097778320312, "logps/rejected": -209.94015502929688, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03688240796327591, "rewards/margins": 0.13695737719535828, "rewards/rejected": -0.10007498413324356, "step": 6770 }, { "epoch": 0.44, "learning_rate": 3.407502958369795e-06, "logits/chosen": -2.279935836791992, "logits/rejected": -2.1321768760681152, "logps/chosen": -240.8074951171875, "logps/rejected": -220.3392333984375, "loss": 0.6873, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.016402950510382652, "rewards/margins": 0.08744045346975327, "rewards/rejected": -0.07103750109672546, "step": 6780 }, { "epoch": 0.44, "learning_rate": 3.4021801662231297e-06, "logits/chosen": -2.3217625617980957, "logits/rejected": -2.168172836303711, "logps/chosen": -249.3394317626953, "logps/rejected": -232.941650390625, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.004626649431884289, "rewards/margins": 0.05877614766359329, "rewards/rejected": -0.06340280920267105, "step": 6790 }, { "epoch": 0.44, "learning_rate": 3.3968526678627793e-06, "logits/chosen": -2.2708561420440674, "logits/rejected": -1.9311338663101196, "logps/chosen": -265.8199462890625, "logps/rejected": -227.452392578125, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.007894573733210564, "rewards/margins": 0.0573604479432106, "rewards/rejected": -0.06525502353906631, "step": 6800 }, { "epoch": 0.44, "eval_logits/chosen": -2.326197862625122, "eval_logits/rejected": -2.137258529663086, "eval_logps/chosen": -233.22720336914062, "eval_logps/rejected": -221.09767150878906, "eval_loss": 0.6899824142456055, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.012222343124449253, "eval_rewards/margins": 0.08263525366783142, "eval_rewards/rejected": -0.0948575884103775, "eval_runtime": 709.2866, "eval_samples_per_second": 2.82, "eval_steps_per_second": 1.41, "step": 6800 }, { "epoch": 0.45, "learning_rate": 3.391520491079586e-06, "logits/chosen": -2.4146409034729004, "logits/rejected": -2.4289791584014893, "logps/chosen": -196.1842498779297, "logps/rejected": -171.66029357910156, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.0022164147812873125, "rewards/margins": 0.06005682423710823, "rewards/rejected": -0.062273234128952026, "step": 6810 }, { "epoch": 0.45, "learning_rate": 3.3861836636887936e-06, "logits/chosen": -2.3121285438537598, "logits/rejected": -2.129061222076416, "logps/chosen": -267.5245361328125, "logps/rejected": -231.2607879638672, "loss": 0.6885, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.007056675851345062, "rewards/margins": 0.10214730352163315, "rewards/rejected": -0.1092039942741394, "step": 6820 }, { "epoch": 0.45, "learning_rate": 3.3808422135299106e-06, "logits/chosen": -2.2894935607910156, "logits/rejected": -2.3739147186279297, "logps/chosen": -286.0987243652344, "logps/rejected": -346.9412536621094, "loss": 0.691, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.004209049511700869, "rewards/margins": 0.07767010480165482, "rewards/rejected": -0.0818791538476944, "step": 6830 }, { "epoch": 0.45, "learning_rate": 3.375496168466556e-06, "logits/chosen": -2.468285083770752, "logits/rejected": -2.0140810012817383, "logps/chosen": -214.4819793701172, "logps/rejected": -164.49362182617188, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.004561436362564564, "rewards/margins": 0.05938258767127991, "rewards/rejected": -0.05482115224003792, "step": 6840 }, { "epoch": 0.45, "learning_rate": 3.3701455563863205e-06, "logits/chosen": -2.5253567695617676, "logits/rejected": -2.0757806301116943, "logps/chosen": -294.3965148925781, "logps/rejected": -268.2409362792969, "loss": 0.6867, "rewards/accuracies": 0.625, "rewards/chosen": -0.025218481197953224, "rewards/margins": 0.1016344428062439, "rewards/rejected": -0.12685291469097137, "step": 6850 }, { "epoch": 0.45, "learning_rate": 3.3647904052006174e-06, "logits/chosen": -2.3288276195526123, "logits/rejected": -2.264437675476074, "logps/chosen": -267.79376220703125, "logps/rejected": -278.85662841796875, "loss": 0.6901, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.024954695254564285, "rewards/margins": 0.08056318014860153, "rewards/rejected": -0.10551787912845612, "step": 6860 }, { "epoch": 0.45, "learning_rate": 3.3594307428445383e-06, "logits/chosen": -2.529595375061035, "logits/rejected": -2.1268649101257324, "logps/chosen": -333.0338439941406, "logps/rejected": -309.6853942871094, "loss": 0.691, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0021690779831260443, "rewards/margins": 0.08068062365055084, "rewards/rejected": -0.0785115510225296, "step": 6870 }, { "epoch": 0.45, "learning_rate": 3.354066597276707e-06, "logits/chosen": -2.184455394744873, "logits/rejected": -2.18107533454895, "logps/chosen": -224.90921020507812, "logps/rejected": -266.7405700683594, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.013449070043861866, "rewards/margins": 0.0647086501121521, "rewards/rejected": -0.07815771549940109, "step": 6880 }, { "epoch": 0.45, "learning_rate": 3.348697996479136e-06, "logits/chosen": -2.405324935913086, "logits/rejected": -2.148305654525757, "logps/chosen": -228.61373901367188, "logps/rejected": -182.25930786132812, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.008047891780734062, "rewards/margins": 0.07117293030023575, "rewards/rejected": -0.07922081649303436, "step": 6890 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -2.224512815475464, "logits/rejected": -2.0861592292785645, "logps/chosen": -178.4225616455078, "logps/rejected": -142.22152709960938, "loss": 0.6881, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.00593004934489727, "rewards/margins": 0.09517361223697662, "rewards/rejected": -0.10110366344451904, "step": 6900 }, { "epoch": 0.45, "eval_logits/chosen": -2.3220698833465576, "eval_logits/rejected": -2.133631944656372, "eval_logps/chosen": -231.9954071044922, "eval_logps/rejected": -219.68096923828125, "eval_loss": 0.6899513006210327, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": 9.537512232782319e-05, "eval_rewards/margins": 0.08078599721193314, "eval_rewards/rejected": -0.08069062978029251, "eval_runtime": 712.9402, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 6900 }, { "epoch": 0.45, "learning_rate": 3.3379475412388724e-06, "logits/chosen": -2.405919075012207, "logits/rejected": -2.2413978576660156, "logps/chosen": -240.8171844482422, "logps/rejected": -219.07095336914062, "loss": 0.6885, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.01137818954885006, "rewards/margins": 0.11952348798513412, "rewards/rejected": -0.1081453189253807, "step": 6910 }, { "epoch": 0.45, "learning_rate": 3.3325657428758207e-06, "logits/chosen": -2.2108242511749268, "logits/rejected": -2.1874756813049316, "logps/chosen": -241.8357696533203, "logps/rejected": -253.5496063232422, "loss": 0.6872, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.018397245556116104, "rewards/margins": 0.10887129604816437, "rewards/rejected": -0.09047403931617737, "step": 6920 }, { "epoch": 0.45, "learning_rate": 3.3271796014420175e-06, "logits/chosen": -2.3145766258239746, "logits/rejected": -2.254108428955078, "logps/chosen": -214.22042846679688, "logps/rejected": -208.04830932617188, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.01770118437707424, "rewards/margins": 0.11536221206188202, "rewards/rejected": -0.133063405752182, "step": 6930 }, { "epoch": 0.45, "learning_rate": 3.3217891450342142e-06, "logits/chosen": -2.3170406818389893, "logits/rejected": -1.9900795221328735, "logps/chosen": -259.0184020996094, "logps/rejected": -200.655029296875, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.004199774004518986, "rewards/margins": 0.10629435628652573, "rewards/rejected": -0.11049413681030273, "step": 6940 }, { "epoch": 0.45, "learning_rate": 3.3163944017716733e-06, "logits/chosen": -2.4827568531036377, "logits/rejected": -2.180410623550415, "logps/chosen": -215.23617553710938, "logps/rejected": -189.19430541992188, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.01134683657437563, "rewards/margins": 0.07055879384279251, "rewards/rejected": -0.05921195074915886, "step": 6950 }, { "epoch": 0.46, "learning_rate": 3.310995399796017e-06, "logits/chosen": -2.421800374984741, "logits/rejected": -2.30001163482666, "logps/chosen": -274.8177185058594, "logps/rejected": -272.7926940917969, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.008314954116940498, "rewards/margins": 0.047770872712135315, "rewards/rejected": -0.05608583241701126, "step": 6960 }, { "epoch": 0.46, "learning_rate": 3.305592167271085e-06, "logits/chosen": -2.302924871444702, "logits/rejected": -2.2044272422790527, "logps/chosen": -194.39797973632812, "logps/rejected": -194.43634033203125, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.028481673449277878, "rewards/margins": 0.10950399935245514, "rewards/rejected": -0.08102231472730637, "step": 6970 }, { "epoch": 0.46, "learning_rate": 3.3001847323827846e-06, "logits/chosen": -2.295625925064087, "logits/rejected": -2.242480516433716, "logps/chosen": -266.0675354003906, "logps/rejected": -272.8329162597656, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.020301584154367447, "rewards/margins": 0.10184173285961151, "rewards/rejected": -0.08154015243053436, "step": 6980 }, { "epoch": 0.46, "learning_rate": 3.2947731233389447e-06, "logits/chosen": -2.3801562786102295, "logits/rejected": -2.004241943359375, "logps/chosen": -247.1204376220703, "logps/rejected": -202.22914123535156, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0038624543230980635, "rewards/margins": 0.10536074638366699, "rewards/rejected": -0.10149829089641571, "step": 6990 }, { "epoch": 0.46, "learning_rate": 3.2893573683691706e-06, "logits/chosen": -2.195082902908325, "logits/rejected": -2.2036478519439697, "logps/chosen": -204.31845092773438, "logps/rejected": -198.5324249267578, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.003750187810510397, "rewards/margins": 0.12109758704900742, "rewards/rejected": -0.11734740436077118, "step": 7000 }, { "epoch": 0.46, "eval_logits/chosen": -2.3213798999786377, "eval_logits/rejected": -2.132957935333252, "eval_logps/chosen": -232.3555450439453, "eval_logps/rejected": -220.5653533935547, "eval_loss": 0.689961850643158, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.00350601295940578, "eval_rewards/margins": 0.08602865040302277, "eval_rewards/rejected": -0.08953466266393661, "eval_runtime": 712.7907, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 7000 }, { "epoch": 0.46, "learning_rate": 3.2839374957246915e-06, "logits/chosen": -2.4078869819641113, "logits/rejected": -2.1226019859313965, "logps/chosen": -268.3845520019531, "logps/rejected": -181.95008850097656, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.022942641749978065, "rewards/margins": 0.051037006080150604, "rewards/rejected": -0.07397964596748352, "step": 7010 }, { "epoch": 0.46, "learning_rate": 3.2785135336782187e-06, "logits/chosen": -2.250103712081909, "logits/rejected": -2.076584577560425, "logps/chosen": -234.687744140625, "logps/rejected": -267.17193603515625, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.007723203394562006, "rewards/margins": 0.10171866416931152, "rewards/rejected": -0.10944187641143799, "step": 7020 }, { "epoch": 0.46, "learning_rate": 3.2730855105237952e-06, "logits/chosen": -2.4183948040008545, "logits/rejected": -2.2227070331573486, "logps/chosen": -218.42391967773438, "logps/rejected": -272.3433532714844, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.018525656312704086, "rewards/margins": 0.09738930314779282, "rewards/rejected": -0.07886365056037903, "step": 7030 }, { "epoch": 0.46, "learning_rate": 3.2676534545766486e-06, "logits/chosen": -2.2223055362701416, "logits/rejected": -2.1927051544189453, "logps/chosen": -211.2403106689453, "logps/rejected": -206.35842895507812, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.012292217463254929, "rewards/margins": 0.05010954663157463, "rewards/rejected": -0.037817325443029404, "step": 7040 }, { "epoch": 0.46, "learning_rate": 3.262217394173043e-06, "logits/chosen": -2.335088014602661, "logits/rejected": -2.052690267562866, "logps/chosen": -242.60922241210938, "logps/rejected": -244.936279296875, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.017585698515176773, "rewards/margins": 0.08410107344388962, "rewards/rejected": -0.10168677568435669, "step": 7050 }, { "epoch": 0.46, "learning_rate": 3.2567773576701333e-06, "logits/chosen": -2.157353162765503, "logits/rejected": -1.9850852489471436, "logps/chosen": -257.1738586425781, "logps/rejected": -238.9078826904297, "loss": 0.6863, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.014163834974169731, "rewards/margins": 0.1204962283372879, "rewards/rejected": -0.1346600502729416, "step": 7060 }, { "epoch": 0.46, "learning_rate": 3.2513333734458154e-06, "logits/chosen": -2.3726444244384766, "logits/rejected": -2.276071310043335, "logps/chosen": -207.281982421875, "logps/rejected": -197.08543395996094, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.004406812135130167, "rewards/margins": 0.06411116570234299, "rewards/rejected": -0.06851796805858612, "step": 7070 }, { "epoch": 0.46, "learning_rate": 3.245885469898576e-06, "logits/chosen": -2.2665371894836426, "logits/rejected": -2.051095962524414, "logps/chosen": -300.17071533203125, "logps/rejected": -246.3044891357422, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.003844373393803835, "rewards/margins": 0.09821848571300507, "rewards/rejected": -0.09437411278486252, "step": 7080 }, { "epoch": 0.46, "learning_rate": 3.2404336754473497e-06, "logits/chosen": -2.263822317123413, "logits/rejected": -2.0003104209899902, "logps/chosen": -265.4076232910156, "logps/rejected": -207.5909423828125, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": 0.020993733778595924, "rewards/margins": 0.069535993039608, "rewards/rejected": -0.04854225367307663, "step": 7090 }, { "epoch": 0.46, "learning_rate": 3.234978018531367e-06, "logits/chosen": -2.587207555770874, "logits/rejected": -2.156212329864502, "logps/chosen": -256.6408996582031, "logps/rejected": -198.82559204101562, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.018593590706586838, "rewards/margins": 0.06984353065490723, "rewards/rejected": -0.05124994367361069, "step": 7100 }, { "epoch": 0.46, "eval_logits/chosen": -2.31289005279541, "eval_logits/rejected": -2.125515937805176, "eval_logps/chosen": -231.62696838378906, "eval_logps/rejected": -219.47421264648438, "eval_loss": 0.689959704875946, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": 0.0037797146942466497, "eval_rewards/margins": 0.08240301162004471, "eval_rewards/rejected": -0.07862330228090286, "eval_runtime": 712.5102, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 7100 }, { "epoch": 0.47, "learning_rate": 3.229518527610006e-06, "logits/chosen": -2.405752182006836, "logits/rejected": -2.1117520332336426, "logps/chosen": -291.0060119628906, "logps/rejected": -252.95669555664062, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017840737476944923, "rewards/margins": 0.07126693427562714, "rewards/rejected": -0.05342619866132736, "step": 7110 }, { "epoch": 0.47, "learning_rate": 3.2240552311626465e-06, "logits/chosen": -2.3919520378112793, "logits/rejected": -2.1560964584350586, "logps/chosen": -239.92935180664062, "logps/rejected": -231.94467163085938, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": 0.012844247743487358, "rewards/margins": 0.06960373371839523, "rewards/rejected": -0.056759487837553024, "step": 7120 }, { "epoch": 0.47, "learning_rate": 3.2185881576885193e-06, "logits/chosen": -2.403320550918579, "logits/rejected": -2.052623748779297, "logps/chosen": -211.9319305419922, "logps/rejected": -180.37948608398438, "loss": 0.6908, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0020910645835101604, "rewards/margins": 0.07115035504102707, "rewards/rejected": -0.07324142754077911, "step": 7130 }, { "epoch": 0.47, "learning_rate": 3.213117335706557e-06, "logits/chosen": -2.279644727706909, "logits/rejected": -2.392721176147461, "logps/chosen": -256.3946838378906, "logps/rejected": -271.6476135253906, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0034970403648912907, "rewards/margins": 0.06046708673238754, "rewards/rejected": -0.0639641284942627, "step": 7140 }, { "epoch": 0.47, "learning_rate": 3.2076427937552473e-06, "logits/chosen": -2.3162059783935547, "logits/rejected": -2.0917744636535645, "logps/chosen": -244.4017791748047, "logps/rejected": -236.8706512451172, "loss": 0.6869, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.010880010202527046, "rewards/margins": 0.11592531204223633, "rewards/rejected": -0.10504531860351562, "step": 7150 }, { "epoch": 0.47, "learning_rate": 3.2021645603924827e-06, "logits/chosen": -2.1680960655212402, "logits/rejected": -2.0739083290100098, "logps/chosen": -137.6443634033203, "logps/rejected": -159.3563995361328, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": 0.008097528479993343, "rewards/margins": 0.12347575277090073, "rewards/rejected": -0.11537822335958481, "step": 7160 }, { "epoch": 0.47, "learning_rate": 3.196682664195412e-06, "logits/chosen": -2.2917988300323486, "logits/rejected": -2.0159618854522705, "logps/chosen": -205.24533081054688, "logps/rejected": -175.20578002929688, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.011971285566687584, "rewards/margins": 0.048515014350414276, "rewards/rejected": -0.06048629805445671, "step": 7170 }, { "epoch": 0.47, "learning_rate": 3.191197133760291e-06, "logits/chosen": -2.5555553436279297, "logits/rejected": -2.229135751724243, "logps/chosen": -262.1866760253906, "logps/rejected": -200.24937438964844, "loss": 0.6871, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.052167803049087524, "rewards/margins": 0.1231146901845932, "rewards/rejected": -0.07094688713550568, "step": 7180 }, { "epoch": 0.47, "learning_rate": 3.185707997702334e-06, "logits/chosen": -2.212904453277588, "logits/rejected": -2.0473551750183105, "logps/chosen": -240.8466033935547, "logps/rejected": -211.2018585205078, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0008177075651474297, "rewards/margins": 0.08206796646118164, "rewards/rejected": -0.08288567513227463, "step": 7190 }, { "epoch": 0.47, "learning_rate": 3.1802152846555624e-06, "logits/chosen": -2.245023727416992, "logits/rejected": -2.220525026321411, "logps/chosen": -222.89584350585938, "logps/rejected": -215.0545654296875, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": 0.013471787795424461, "rewards/margins": 0.07678040862083435, "rewards/rejected": -0.06330861151218414, "step": 7200 }, { "epoch": 0.47, "eval_logits/chosen": -2.3261678218841553, "eval_logits/rejected": -2.1375534534454346, "eval_logps/chosen": -230.54725646972656, "eval_logps/rejected": -217.60206604003906, "eval_loss": 0.6899964213371277, "eval_rewards/accuracies": 0.621999979019165, "eval_rewards/chosen": 0.014577223919332027, "eval_rewards/margins": 0.07447873055934906, "eval_rewards/rejected": -0.059901509433984756, "eval_runtime": 713.9129, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 7200 }, { "epoch": 0.47, "learning_rate": 3.174719023272659e-06, "logits/chosen": -2.4018406867980957, "logits/rejected": -2.4472343921661377, "logps/chosen": -212.6780242919922, "logps/rejected": -267.15069580078125, "loss": 0.6882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.010161024518311024, "rewards/margins": 0.08123484998941422, "rewards/rejected": -0.07107381522655487, "step": 7210 }, { "epoch": 0.47, "learning_rate": 3.169219242224816e-06, "logits/chosen": -2.3529484272003174, "logits/rejected": -2.1854114532470703, "logps/chosen": -240.903564453125, "logps/rejected": -241.4931640625, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00976332277059555, "rewards/margins": 0.06684872508049011, "rewards/rejected": -0.07661206275224686, "step": 7220 }, { "epoch": 0.47, "learning_rate": 3.1637159702015837e-06, "logits/chosen": -2.3580760955810547, "logits/rejected": -2.0424869060516357, "logps/chosen": -205.4845428466797, "logps/rejected": -197.5430450439453, "loss": 0.6888, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.004075945820659399, "rewards/margins": 0.11381276696920395, "rewards/rejected": -0.10973681509494781, "step": 7230 }, { "epoch": 0.47, "learning_rate": 3.1582092359107263e-06, "logits/chosen": -2.4041996002197266, "logits/rejected": -2.184713840484619, "logps/chosen": -278.28271484375, "logps/rejected": -241.3439483642578, "loss": 0.6935, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -4.9874186515808105e-05, "rewards/margins": 0.08891940116882324, "rewards/rejected": -0.08896928280591965, "step": 7240 }, { "epoch": 0.47, "learning_rate": 3.152699068078067e-06, "logits/chosen": -2.2145161628723145, "logits/rejected": -2.00947904586792, "logps/chosen": -284.87933349609375, "logps/rejected": -276.2130432128906, "loss": 0.6879, "rewards/accuracies": 0.625, "rewards/chosen": -0.045890286564826965, "rewards/margins": 0.11874841153621674, "rewards/rejected": -0.1646386981010437, "step": 7250 }, { "epoch": 0.48, "learning_rate": 3.1471854954473415e-06, "logits/chosen": -2.354721784591675, "logits/rejected": -2.3982090950012207, "logps/chosen": -248.9252471923828, "logps/rejected": -255.275634765625, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.018414465710520744, "rewards/margins": 0.10118373483419418, "rewards/rejected": -0.08276927471160889, "step": 7260 }, { "epoch": 0.48, "learning_rate": 3.1416685467800436e-06, "logits/chosen": -2.1534171104431152, "logits/rejected": -2.116290330886841, "logps/chosen": -188.9571990966797, "logps/rejected": -176.0358428955078, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03374389559030533, "rewards/margins": 0.09608308225870132, "rewards/rejected": -0.12982699275016785, "step": 7270 }, { "epoch": 0.48, "learning_rate": 3.1361482508552803e-06, "logits/chosen": -2.3551740646362305, "logits/rejected": -1.8543596267700195, "logps/chosen": -242.98074340820312, "logps/rejected": -206.5134735107422, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.010297993198037148, "rewards/margins": 0.06919042766094208, "rewards/rejected": -0.07948841899633408, "step": 7280 }, { "epoch": 0.48, "learning_rate": 3.1306246364696198e-06, "logits/chosen": -2.481549024581909, "logits/rejected": -2.280365228652954, "logps/chosen": -251.1072235107422, "logps/rejected": -242.42062377929688, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.013671429827809334, "rewards/margins": 0.08818572014570236, "rewards/rejected": -0.07451429218053818, "step": 7290 }, { "epoch": 0.48, "learning_rate": 3.1250977324369413e-06, "logits/chosen": -2.2711830139160156, "logits/rejected": -2.1912612915039062, "logps/chosen": -157.5973663330078, "logps/rejected": -173.55458068847656, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013550999574363232, "rewards/margins": 0.09245215356349945, "rewards/rejected": -0.07890114188194275, "step": 7300 }, { "epoch": 0.48, "eval_logits/chosen": -2.314774990081787, "eval_logits/rejected": -2.127025604248047, "eval_logps/chosen": -232.7455596923828, "eval_logps/rejected": -220.20619201660156, "eval_loss": 0.6899450421333313, "eval_rewards/accuracies": 0.6290000081062317, "eval_rewards/chosen": -0.007406196556985378, "eval_rewards/margins": 0.07853667438030243, "eval_rewards/rejected": -0.08594285696744919, "eval_runtime": 714.0048, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 7300 }, { "epoch": 0.48, "learning_rate": 3.1195675675882825e-06, "logits/chosen": -2.2243969440460205, "logits/rejected": -2.1143479347229004, "logps/chosen": -238.1915740966797, "logps/rejected": -213.28317260742188, "loss": 0.6909, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.030005771666765213, "rewards/margins": 0.05696401745080948, "rewards/rejected": -0.0869697779417038, "step": 7310 }, { "epoch": 0.48, "learning_rate": 3.1140341707716926e-06, "logits/chosen": -2.176473617553711, "logits/rejected": -1.929321527481079, "logps/chosen": -196.3441162109375, "logps/rejected": -165.09817504882812, "loss": 0.6866, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.005364052020013332, "rewards/margins": 0.11770284175872803, "rewards/rejected": -0.12306687980890274, "step": 7320 }, { "epoch": 0.48, "learning_rate": 3.1084975708520803e-06, "logits/chosen": -2.4346864223480225, "logits/rejected": -2.034972906112671, "logps/chosen": -260.06732177734375, "logps/rejected": -198.18801879882812, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.016565248370170593, "rewards/margins": 0.08867197483778, "rewards/rejected": -0.0721067264676094, "step": 7330 }, { "epoch": 0.48, "learning_rate": 3.1029577967110625e-06, "logits/chosen": -2.4259464740753174, "logits/rejected": -2.2102203369140625, "logps/chosen": -216.46865844726562, "logps/rejected": -171.20114135742188, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0007433153805322945, "rewards/margins": 0.03790837153792381, "rewards/rejected": -0.038651686161756516, "step": 7340 }, { "epoch": 0.48, "learning_rate": 3.097414877246814e-06, "logits/chosen": -2.2673535346984863, "logits/rejected": -2.0183987617492676, "logps/chosen": -197.57110595703125, "logps/rejected": -177.40805053710938, "loss": 0.6862, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.010723413899540901, "rewards/margins": 0.09349813312292099, "rewards/rejected": -0.08277471363544464, "step": 7350 }, { "epoch": 0.48, "learning_rate": 3.0918688413739197e-06, "logits/chosen": -2.3423869609832764, "logits/rejected": -2.030740261077881, "logps/chosen": -229.8058319091797, "logps/rejected": -176.20933532714844, "loss": 0.6871, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.028771137818694115, "rewards/margins": 0.10676223039627075, "rewards/rejected": -0.07799109816551208, "step": 7360 }, { "epoch": 0.48, "learning_rate": 3.0863197180232178e-06, "logits/chosen": -2.3902642726898193, "logits/rejected": -2.0197548866271973, "logps/chosen": -197.1629638671875, "logps/rejected": -192.62374877929688, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.025812137871980667, "rewards/margins": 0.07681456953287125, "rewards/rejected": -0.051002420485019684, "step": 7370 }, { "epoch": 0.48, "learning_rate": 3.0807675361416554e-06, "logits/chosen": -2.2906887531280518, "logits/rejected": -2.047722339630127, "logps/chosen": -186.08908081054688, "logps/rejected": -114.9020004272461, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02791173756122589, "rewards/margins": 0.0784730315208435, "rewards/rejected": -0.050561290234327316, "step": 7380 }, { "epoch": 0.48, "learning_rate": 3.0752123246921327e-06, "logits/chosen": -2.4102749824523926, "logits/rejected": -2.1352226734161377, "logps/chosen": -277.206298828125, "logps/rejected": -218.3984832763672, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007069566752761602, "rewards/margins": 0.08123649656772614, "rewards/rejected": -0.0741669163107872, "step": 7390 }, { "epoch": 0.48, "learning_rate": 3.069654112653353e-06, "logits/chosen": -2.4256751537323, "logits/rejected": -2.2184150218963623, "logps/chosen": -216.2940216064453, "logps/rejected": -188.50967407226562, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.0019019130850210786, "rewards/margins": 0.04767733812332153, "rewards/rejected": -0.04577542468905449, "step": 7400 }, { "epoch": 0.48, "eval_logits/chosen": -2.311318874359131, "eval_logits/rejected": -2.123793601989746, "eval_logps/chosen": -231.12094116210938, "eval_logps/rejected": -218.42494201660156, "eval_loss": 0.6899686455726624, "eval_rewards/accuracies": 0.6284999847412109, "eval_rewards/chosen": 0.008840080350637436, "eval_rewards/margins": 0.07697049528360367, "eval_rewards/rejected": -0.06813041865825653, "eval_runtime": 713.3758, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 7400 }, { "epoch": 0.48, "learning_rate": 3.064092929019673e-06, "logits/chosen": -2.3009819984436035, "logits/rejected": -2.3356680870056152, "logps/chosen": -256.2542419433594, "logps/rejected": -281.6364440917969, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012892307713627815, "rewards/margins": 0.05507419630885124, "rewards/rejected": -0.04218188300728798, "step": 7410 }, { "epoch": 0.49, "learning_rate": 3.058528802800952e-06, "logits/chosen": -2.342904567718506, "logits/rejected": -2.102327823638916, "logps/chosen": -290.8139953613281, "logps/rejected": -261.8834228515625, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01819724775850773, "rewards/margins": 0.07642240822315216, "rewards/rejected": -0.05822516605257988, "step": 7420 }, { "epoch": 0.49, "learning_rate": 3.052961763022397e-06, "logits/chosen": -2.481123685836792, "logits/rejected": -2.149416208267212, "logps/chosen": -183.3396453857422, "logps/rejected": -155.44528198242188, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": -0.004584819078445435, "rewards/margins": 0.10829279571771622, "rewards/rejected": -0.11287760734558105, "step": 7430 }, { "epoch": 0.49, "learning_rate": 3.047391838724415e-06, "logits/chosen": -2.444658041000366, "logits/rejected": -1.982791543006897, "logps/chosen": -232.76083374023438, "logps/rejected": -227.51760864257812, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02757873572409153, "rewards/margins": 0.10823357105255127, "rewards/rejected": -0.08065483719110489, "step": 7440 }, { "epoch": 0.49, "learning_rate": 3.0418190589624587e-06, "logits/chosen": -2.3566372394561768, "logits/rejected": -2.09330153465271, "logps/chosen": -178.53126525878906, "logps/rejected": -192.35299682617188, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.009091891348361969, "rewards/margins": 0.05469350144267082, "rewards/rejected": -0.06378540396690369, "step": 7450 }, { "epoch": 0.49, "learning_rate": 3.0362434528068784e-06, "logits/chosen": -2.3358893394470215, "logits/rejected": -1.9141845703125, "logps/chosen": -268.863037109375, "logps/rejected": -194.3677978515625, "loss": 0.688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004601252265274525, "rewards/margins": 0.10998895019292831, "rewards/rejected": -0.10538768768310547, "step": 7460 }, { "epoch": 0.49, "learning_rate": 3.0306650493427657e-06, "logits/chosen": -2.2316243648529053, "logits/rejected": -2.127760410308838, "logps/chosen": -230.8894500732422, "logps/rejected": -230.5992889404297, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": 0.007297568954527378, "rewards/margins": 0.08671748638153076, "rewards/rejected": -0.07941991835832596, "step": 7470 }, { "epoch": 0.49, "learning_rate": 3.0250838776698077e-06, "logits/chosen": -2.096904754638672, "logits/rejected": -2.1422505378723145, "logps/chosen": -184.96865844726562, "logps/rejected": -193.18240356445312, "loss": 0.6862, "rewards/accuracies": 0.625, "rewards/chosen": -0.010493558831512928, "rewards/margins": 0.10960110276937485, "rewards/rejected": -0.1200946569442749, "step": 7480 }, { "epoch": 0.49, "learning_rate": 3.0194999669021275e-06, "logits/chosen": -2.098390579223633, "logits/rejected": -1.7727285623550415, "logps/chosen": -226.0522003173828, "logps/rejected": -189.67776489257812, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.008162322454154491, "rewards/margins": 0.10677523910999298, "rewards/rejected": -0.09861291944980621, "step": 7490 }, { "epoch": 0.49, "learning_rate": 3.0139133461681403e-06, "logits/chosen": -2.243513822555542, "logits/rejected": -2.0963521003723145, "logps/chosen": -263.7023010253906, "logps/rejected": -215.9556121826172, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006919173989444971, "rewards/margins": 0.08393418788909912, "rewards/rejected": -0.07701500505208969, "step": 7500 }, { "epoch": 0.49, "eval_logits/chosen": -2.286125898361206, "eval_logits/rejected": -2.1006903648376465, "eval_logps/chosen": -231.99969482421875, "eval_logps/rejected": -219.49575805664062, "eval_loss": 0.6899300813674927, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": 5.265325307846069e-05, "eval_rewards/margins": 0.07889124006032944, "eval_rewards/rejected": -0.07883859425783157, "eval_runtime": 711.1311, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 7500 }, { "epoch": 0.49, "learning_rate": 3.0083240446103965e-06, "logits/chosen": -2.0148041248321533, "logits/rejected": -1.978687047958374, "logps/chosen": -184.3916015625, "logps/rejected": -200.95066833496094, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0035699151922017336, "rewards/margins": 0.08889930695295334, "rewards/rejected": -0.0924692153930664, "step": 7510 }, { "epoch": 0.49, "learning_rate": 3.0027320913854306e-06, "logits/chosen": -2.4875292778015137, "logits/rejected": -2.200932025909424, "logps/chosen": -291.66192626953125, "logps/rejected": -237.73507690429688, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.007996944710612297, "rewards/margins": 0.08782526105642319, "rewards/rejected": -0.07982831448316574, "step": 7520 }, { "epoch": 0.49, "learning_rate": 2.997137515663609e-06, "logits/chosen": -2.2359402179718018, "logits/rejected": -2.1508307456970215, "logps/chosen": -223.72048950195312, "logps/rejected": -195.07565307617188, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01656939461827278, "rewards/margins": 0.09054360538721085, "rewards/rejected": -0.07397421449422836, "step": 7530 }, { "epoch": 0.49, "learning_rate": 2.991540346628981e-06, "logits/chosen": -2.342252492904663, "logits/rejected": -2.15889573097229, "logps/chosen": -238.47000122070312, "logps/rejected": -218.312744140625, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": 0.011303565464913845, "rewards/margins": 0.05950998514890671, "rewards/rejected": -0.048206426203250885, "step": 7540 }, { "epoch": 0.49, "learning_rate": 2.985940613479121e-06, "logits/chosen": -2.4330556392669678, "logits/rejected": -2.323356866836548, "logps/chosen": -292.80767822265625, "logps/rejected": -241.87033081054688, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.010419869795441628, "rewards/margins": 0.08434535562992096, "rewards/rejected": -0.07392548024654388, "step": 7550 }, { "epoch": 0.49, "learning_rate": 2.980338345424981e-06, "logits/chosen": -2.2963860034942627, "logits/rejected": -1.9866485595703125, "logps/chosen": -248.8218536376953, "logps/rejected": -206.16690063476562, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.003979469649493694, "rewards/margins": 0.05621809884905815, "rewards/rejected": -0.05223863199353218, "step": 7560 }, { "epoch": 0.5, "learning_rate": 2.974733571690735e-06, "logits/chosen": -2.3758111000061035, "logits/rejected": -2.0978314876556396, "logps/chosen": -238.9188232421875, "logps/rejected": -186.61029052734375, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.015120044350624084, "rewards/margins": 0.0698397159576416, "rewards/rejected": -0.08495976030826569, "step": 7570 }, { "epoch": 0.5, "learning_rate": 2.9691263215136274e-06, "logits/chosen": -2.339653730392456, "logits/rejected": -2.3221707344055176, "logps/chosen": -263.97918701171875, "logps/rejected": -240.21945190429688, "loss": 0.6909, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03021194040775299, "rewards/margins": 0.07894815504550934, "rewards/rejected": -0.048736222088336945, "step": 7580 }, { "epoch": 0.5, "learning_rate": 2.963516624143823e-06, "logits/chosen": -2.230799436569214, "logits/rejected": -2.1142804622650146, "logps/chosen": -213.874755859375, "logps/rejected": -185.1417694091797, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.013080480508506298, "rewards/margins": 0.0879780501127243, "rewards/rejected": -0.10105852037668228, "step": 7590 }, { "epoch": 0.5, "learning_rate": 2.9579045088442504e-06, "logits/chosen": -2.1219449043273926, "logits/rejected": -2.1596245765686035, "logps/chosen": -189.5128631591797, "logps/rejected": -221.0270538330078, "loss": 0.6874, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.010078911669552326, "rewards/margins": 0.11765221506357193, "rewards/rejected": -0.12773114442825317, "step": 7600 }, { "epoch": 0.5, "eval_logits/chosen": -2.288795232772827, "eval_logits/rejected": -2.1032533645629883, "eval_logps/chosen": -232.4485321044922, "eval_logps/rejected": -220.70327758789062, "eval_loss": 0.6899698376655579, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -0.004435794893652201, "eval_rewards/margins": 0.08647802472114563, "eval_rewards/rejected": -0.0909138172864914, "eval_runtime": 714.3681, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 7600 }, { "epoch": 0.5, "learning_rate": 2.9522900048904534e-06, "logits/chosen": -2.2064361572265625, "logits/rejected": -2.1144938468933105, "logps/chosen": -244.34390258789062, "logps/rejected": -218.4773406982422, "loss": 0.693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.046057600528001785, "rewards/margins": 0.041549500077962875, "rewards/rejected": -0.08760710060596466, "step": 7610 }, { "epoch": 0.5, "learning_rate": 2.9466731415704343e-06, "logits/chosen": -2.2881698608398438, "logits/rejected": -2.161687135696411, "logps/chosen": -225.63803100585938, "logps/rejected": -229.7403564453125, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.018513670191168785, "rewards/margins": 0.0846969336271286, "rewards/rejected": -0.10321060568094254, "step": 7620 }, { "epoch": 0.5, "learning_rate": 2.941053948184503e-06, "logits/chosen": -2.339186668395996, "logits/rejected": -2.1879472732543945, "logps/chosen": -279.9281921386719, "logps/rejected": -249.49502563476562, "loss": 0.6916, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00010864362411666662, "rewards/margins": 0.04479020833969116, "rewards/rejected": -0.04468156024813652, "step": 7630 }, { "epoch": 0.5, "learning_rate": 2.935432454045125e-06, "logits/chosen": -2.1335971355438232, "logits/rejected": -2.1970443725585938, "logps/chosen": -233.8820037841797, "logps/rejected": -216.0299835205078, "loss": 0.6916, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.019176747649908066, "rewards/margins": 0.033554740250110626, "rewards/rejected": -0.05273149162530899, "step": 7640 }, { "epoch": 0.5, "learning_rate": 2.929808688476768e-06, "logits/chosen": -2.363029956817627, "logits/rejected": -2.2410759925842285, "logps/chosen": -240.63131713867188, "logps/rejected": -230.9453887939453, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.012689086608588696, "rewards/margins": 0.07916554063558578, "rewards/rejected": -0.09185463190078735, "step": 7650 }, { "epoch": 0.5, "learning_rate": 2.924182680815748e-06, "logits/chosen": -2.2831203937530518, "logits/rejected": -2.210198402404785, "logps/chosen": -232.90261840820312, "logps/rejected": -222.2987823486328, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.017279820516705513, "rewards/margins": 0.12103778123855591, "rewards/rejected": -0.10375796258449554, "step": 7660 }, { "epoch": 0.5, "learning_rate": 2.9185544604100765e-06, "logits/chosen": -2.063129186630249, "logits/rejected": -1.9675689935684204, "logps/chosen": -198.81106567382812, "logps/rejected": -202.04229736328125, "loss": 0.6891, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.02900967001914978, "rewards/margins": 0.07486601918935776, "rewards/rejected": -0.10387568175792694, "step": 7670 }, { "epoch": 0.5, "learning_rate": 2.9129240566193083e-06, "logits/chosen": -2.3740992546081543, "logits/rejected": -2.0523669719696045, "logps/chosen": -202.94161987304688, "logps/rejected": -199.41842651367188, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00861315242946148, "rewards/margins": 0.09764768928289413, "rewards/rejected": -0.10626085102558136, "step": 7680 }, { "epoch": 0.5, "learning_rate": 2.9072914988143874e-06, "logits/chosen": -2.1358678340911865, "logits/rejected": -2.0475707054138184, "logps/chosen": -201.20492553710938, "logps/rejected": -204.98117065429688, "loss": 0.6885, "rewards/accuracies": 0.75, "rewards/chosen": 0.002248757751658559, "rewards/margins": 0.12247662246227264, "rewards/rejected": -0.1202278584241867, "step": 7690 }, { "epoch": 0.5, "learning_rate": 2.9016568163774956e-06, "logits/chosen": -2.360272169113159, "logits/rejected": -2.1226887702941895, "logps/chosen": -172.92312622070312, "logps/rejected": -146.11019897460938, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.006938323378562927, "rewards/margins": 0.07239948213100433, "rewards/rejected": -0.07933782041072845, "step": 7700 }, { "epoch": 0.5, "eval_logits/chosen": -2.2827038764953613, "eval_logits/rejected": -2.097726583480835, "eval_logps/chosen": -231.82516479492188, "eval_logps/rejected": -219.7780303955078, "eval_loss": 0.6899218559265137, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.001797709846869111, "eval_rewards/margins": 0.08345920592546463, "eval_rewards/rejected": -0.08166150003671646, "eval_runtime": 710.0146, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.408, "step": 7700 }, { "epoch": 0.5, "learning_rate": 2.8960200387018942e-06, "logits/chosen": -2.1221325397491455, "logits/rejected": -2.0857224464416504, "logps/chosen": -308.96600341796875, "logps/rejected": -268.85888671875, "loss": 0.6925, "rewards/accuracies": 0.625, "rewards/chosen": -0.012330361641943455, "rewards/margins": 0.08727259188890457, "rewards/rejected": -0.09960294514894485, "step": 7710 }, { "epoch": 0.51, "learning_rate": 2.8903811951917792e-06, "logits/chosen": -2.2766757011413574, "logits/rejected": -2.124586582183838, "logps/chosen": -199.05517578125, "logps/rejected": -159.13063049316406, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.007754988968372345, "rewards/margins": 0.07217663526535034, "rewards/rejected": -0.07993160933256149, "step": 7720 }, { "epoch": 0.51, "learning_rate": 2.88474031526212e-06, "logits/chosen": -2.2419610023498535, "logits/rejected": -2.2114017009735107, "logps/chosen": -203.2736053466797, "logps/rejected": -222.61083984375, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.004338492639362812, "rewards/margins": 0.05908681079745293, "rewards/rejected": -0.06342529505491257, "step": 7730 }, { "epoch": 0.51, "learning_rate": 2.879097428338509e-06, "logits/chosen": -2.2317774295806885, "logits/rejected": -1.9235107898712158, "logps/chosen": -217.020751953125, "logps/rejected": -202.31373596191406, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011257743462920189, "rewards/margins": 0.0762132927775383, "rewards/rejected": -0.08747103810310364, "step": 7740 }, { "epoch": 0.51, "learning_rate": 2.8734525638570094e-06, "logits/chosen": -2.234351634979248, "logits/rejected": -2.1596150398254395, "logps/chosen": -232.974853515625, "logps/rejected": -226.3662567138672, "loss": 0.6933, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.012193715199828148, "rewards/margins": 0.05764711648225784, "rewards/rejected": -0.04545340687036514, "step": 7750 }, { "epoch": 0.51, "learning_rate": 2.8678057512639982e-06, "logits/chosen": -2.181051254272461, "logits/rejected": -2.088076114654541, "logps/chosen": -284.4569091796875, "logps/rejected": -273.23193359375, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.016095653176307678, "rewards/margins": 0.10834445059299469, "rewards/rejected": -0.09224879741668701, "step": 7760 }, { "epoch": 0.51, "learning_rate": 2.8621570200160172e-06, "logits/chosen": -2.0607194900512695, "logits/rejected": -1.9694864749908447, "logps/chosen": -167.73159790039062, "logps/rejected": -169.00257873535156, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.008374934084713459, "rewards/margins": 0.09448956698179245, "rewards/rejected": -0.08611463010311127, "step": 7770 }, { "epoch": 0.51, "learning_rate": 2.856506399579615e-06, "logits/chosen": -2.414057493209839, "logits/rejected": -2.0493381023406982, "logps/chosen": -222.3790740966797, "logps/rejected": -209.5500946044922, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.04015577957034111, "rewards/margins": 0.06660310924053192, "rewards/rejected": -0.10675889253616333, "step": 7780 }, { "epoch": 0.51, "learning_rate": 2.8508539194311964e-06, "logits/chosen": -2.3235208988189697, "logits/rejected": -2.316335916519165, "logps/chosen": -255.4383544921875, "logps/rejected": -273.5601806640625, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.017049241811037064, "rewards/margins": 0.052917949855327606, "rewards/rejected": -0.06996718794107437, "step": 7790 }, { "epoch": 0.51, "learning_rate": 2.8451996090568656e-06, "logits/chosen": -2.2277419567108154, "logits/rejected": -2.1044132709503174, "logps/chosen": -189.9392547607422, "logps/rejected": -181.34808349609375, "loss": 0.6885, "rewards/accuracies": 0.625, "rewards/chosen": -0.03799670934677124, "rewards/margins": 0.09370444715023041, "rewards/rejected": -0.13170115649700165, "step": 7800 }, { "epoch": 0.51, "eval_logits/chosen": -2.2713305950164795, "eval_logits/rejected": -2.086493968963623, "eval_logps/chosen": -235.31703186035156, "eval_logps/rejected": -223.4753875732422, "eval_loss": 0.6899875402450562, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": -0.03312075883150101, "eval_rewards/margins": 0.08551418036222458, "eval_rewards/rejected": -0.11863493919372559, "eval_runtime": 707.6852, "eval_samples_per_second": 2.826, "eval_steps_per_second": 1.413, "step": 7800 }, { "epoch": 0.51, "learning_rate": 2.839543497952276e-06, "logits/chosen": -2.1599411964416504, "logits/rejected": -2.2668697834014893, "logps/chosen": -189.414306640625, "logps/rejected": -190.27862548828125, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04804200679063797, "rewards/margins": 0.09568199515342712, "rewards/rejected": -0.1437240093946457, "step": 7810 }, { "epoch": 0.51, "learning_rate": 2.833885615622474e-06, "logits/chosen": -2.2124152183532715, "logits/rejected": -2.0647387504577637, "logps/chosen": -208.2938995361328, "logps/rejected": -225.68045043945312, "loss": 0.6928, "rewards/accuracies": 0.625, "rewards/chosen": -0.05718092992901802, "rewards/margins": 0.07911114394664764, "rewards/rejected": -0.13629207015037537, "step": 7820 }, { "epoch": 0.51, "learning_rate": 2.8282259915817454e-06, "logits/chosen": -1.902604341506958, "logits/rejected": -2.096595287322998, "logps/chosen": -144.83163452148438, "logps/rejected": -194.21328735351562, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.038869358599185944, "rewards/margins": 0.09170033037662506, "rewards/rejected": -0.1305696964263916, "step": 7830 }, { "epoch": 0.51, "learning_rate": 2.8225646553534614e-06, "logits/chosen": -2.0661423206329346, "logits/rejected": -1.9575055837631226, "logps/chosen": -201.17019653320312, "logps/rejected": -204.97335815429688, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01854178123176098, "rewards/margins": 0.07094518840312958, "rewards/rejected": -0.0894869714975357, "step": 7840 }, { "epoch": 0.51, "learning_rate": 2.8169016364699255e-06, "logits/chosen": -2.2641472816467285, "logits/rejected": -1.9965393543243408, "logps/chosen": -217.72933959960938, "logps/rejected": -225.2642822265625, "loss": 0.6926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05841469764709473, "rewards/margins": 0.057847362011671066, "rewards/rejected": -0.1162620559334755, "step": 7850 }, { "epoch": 0.51, "learning_rate": 2.811236964472217e-06, "logits/chosen": -2.3709425926208496, "logits/rejected": -2.0033254623413086, "logps/chosen": -314.4047546386719, "logps/rejected": -261.5574035644531, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.03998160362243652, "rewards/margins": 0.07272603362798691, "rewards/rejected": -0.11270763725042343, "step": 7860 }, { "epoch": 0.51, "learning_rate": 2.805570668910041e-06, "logits/chosen": -2.0802268981933594, "logits/rejected": -2.0542476177215576, "logps/chosen": -177.2976837158203, "logps/rejected": -247.8351593017578, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07658363878726959, "rewards/margins": 0.07037156820297241, "rewards/rejected": -0.146955206990242, "step": 7870 }, { "epoch": 0.52, "learning_rate": 2.7999027793415695e-06, "logits/chosen": -2.464724063873291, "logits/rejected": -1.99410879611969, "logps/chosen": -250.383544921875, "logps/rejected": -210.59585571289062, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054675567895174026, "rewards/margins": 0.051379382610321045, "rewards/rejected": -0.10605494678020477, "step": 7880 }, { "epoch": 0.52, "learning_rate": 2.794233325333293e-06, "logits/chosen": -2.1549906730651855, "logits/rejected": -2.0477986335754395, "logps/chosen": -261.9752502441406, "logps/rejected": -247.65414428710938, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03693775087594986, "rewards/margins": 0.09356243908405304, "rewards/rejected": -0.1305001974105835, "step": 7890 }, { "epoch": 0.52, "learning_rate": 2.7885623364598597e-06, "logits/chosen": -2.3811306953430176, "logits/rejected": -2.0712475776672363, "logps/chosen": -270.0716247558594, "logps/rejected": -237.21182250976562, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06379027664661407, "rewards/margins": 0.09059080481529236, "rewards/rejected": -0.15438108146190643, "step": 7900 }, { "epoch": 0.52, "eval_logits/chosen": -2.269857883453369, "eval_logits/rejected": -2.085240125656128, "eval_logps/chosen": -236.7635498046875, "eval_logps/rejected": -224.1826629638672, "eval_loss": 0.6899347901344299, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.047585878521203995, "eval_rewards/margins": 0.07812169939279556, "eval_rewards/rejected": -0.12570756673812866, "eval_runtime": 710.7257, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 7900 }, { "epoch": 0.52, "learning_rate": 2.782889842303926e-06, "logits/chosen": -2.2479918003082275, "logits/rejected": -2.0780441761016846, "logps/chosen": -169.74075317382812, "logps/rejected": -166.49923706054688, "loss": 0.6938, "rewards/accuracies": 0.5, "rewards/chosen": -0.09511653333902359, "rewards/margins": 0.022563491016626358, "rewards/rejected": -0.11768001317977905, "step": 7910 }, { "epoch": 0.52, "learning_rate": 2.7772158724559987e-06, "logits/chosen": -2.092353105545044, "logits/rejected": -1.937853217124939, "logps/chosen": -221.2228546142578, "logps/rejected": -273.72015380859375, "loss": 0.6844, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.041138116270303726, "rewards/margins": 0.15026527643203735, "rewards/rejected": -0.1914033740758896, "step": 7920 }, { "epoch": 0.52, "learning_rate": 2.7715404565142856e-06, "logits/chosen": -2.250084161758423, "logits/rejected": -2.1173858642578125, "logps/chosen": -204.4228515625, "logps/rejected": -204.76995849609375, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.0445866696536541, "rewards/margins": 0.06569734215736389, "rewards/rejected": -0.11028401553630829, "step": 7930 }, { "epoch": 0.52, "learning_rate": 2.7658636240845354e-06, "logits/chosen": -2.4091989994049072, "logits/rejected": -2.306553363800049, "logps/chosen": -226.8249969482422, "logps/rejected": -250.21707153320312, "loss": 0.6904, "rewards/accuracies": 0.75, "rewards/chosen": -0.03376641497015953, "rewards/margins": 0.08774002641439438, "rewards/rejected": -0.12150643765926361, "step": 7940 }, { "epoch": 0.52, "learning_rate": 2.7601854047798872e-06, "logits/chosen": -2.1954774856567383, "logits/rejected": -2.236949920654297, "logps/chosen": -228.4868621826172, "logps/rejected": -254.75991821289062, "loss": 0.6892, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.045634832233190536, "rewards/margins": 0.06857772916555405, "rewards/rejected": -0.11421255767345428, "step": 7950 }, { "epoch": 0.52, "learning_rate": 2.7545058282207148e-06, "logits/chosen": -2.3288655281066895, "logits/rejected": -1.9428226947784424, "logps/chosen": -215.27224731445312, "logps/rejected": -191.93368530273438, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03730421140789986, "rewards/margins": 0.07240404933691025, "rewards/rejected": -0.1097082644701004, "step": 7960 }, { "epoch": 0.52, "learning_rate": 2.748824924034471e-06, "logits/chosen": -2.2552783489227295, "logits/rejected": -2.120013475418091, "logps/chosen": -226.8047637939453, "logps/rejected": -217.0663299560547, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07394564896821976, "rewards/margins": 0.09158217161893845, "rewards/rejected": -0.1655278354883194, "step": 7970 }, { "epoch": 0.52, "learning_rate": 2.743142721855536e-06, "logits/chosen": -2.1140682697296143, "logits/rejected": -2.0926127433776855, "logps/chosen": -157.36866760253906, "logps/rejected": -154.32342529296875, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05508549138903618, "rewards/margins": 0.053032286465168, "rewards/rejected": -0.10811777412891388, "step": 7980 }, { "epoch": 0.52, "learning_rate": 2.737459251325058e-06, "logits/chosen": -2.23268985748291, "logits/rejected": -2.1779227256774902, "logps/chosen": -271.97705078125, "logps/rejected": -255.03189086914062, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.020035061985254288, "rewards/margins": 0.055861860513687134, "rewards/rejected": -0.07589691877365112, "step": 7990 }, { "epoch": 0.52, "learning_rate": 2.731774542090804e-06, "logits/chosen": -2.1980490684509277, "logits/rejected": -1.7955074310302734, "logps/chosen": -196.6571044921875, "logps/rejected": -182.37567138671875, "loss": 0.6911, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.03881066292524338, "rewards/margins": 0.04753577709197998, "rewards/rejected": -0.08634644001722336, "step": 8000 }, { "epoch": 0.52, "eval_logits/chosen": -2.265841245651245, "eval_logits/rejected": -2.081429958343506, "eval_logps/chosen": -235.2987060546875, "eval_logps/rejected": -223.01144409179688, "eval_loss": 0.6899079084396362, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.03293740004301071, "eval_rewards/margins": 0.08105786144733429, "eval_rewards/rejected": -0.113995261490345, "eval_runtime": 710.8315, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 8000 }, { "epoch": 0.52, "learning_rate": 2.7260886238070034e-06, "logits/chosen": -2.271594524383545, "logits/rejected": -2.1849944591522217, "logps/chosen": -198.82374572753906, "logps/rejected": -202.0972442626953, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.023537836968898773, "rewards/margins": 0.07981632649898529, "rewards/rejected": -0.10335417091846466, "step": 8010 }, { "epoch": 0.52, "learning_rate": 2.72040152613419e-06, "logits/chosen": -2.2961008548736572, "logits/rejected": -1.8004083633422852, "logps/chosen": -219.9088897705078, "logps/rejected": -149.8590087890625, "loss": 0.6852, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03033808246254921, "rewards/margins": 0.13175645470619202, "rewards/rejected": -0.16209453344345093, "step": 8020 }, { "epoch": 0.53, "learning_rate": 2.7147132787390516e-06, "logits/chosen": -2.286135196685791, "logits/rejected": -1.993798851966858, "logps/chosen": -229.1460418701172, "logps/rejected": -220.80264282226562, "loss": 0.6912, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01667933538556099, "rewards/margins": 0.08642159402370453, "rewards/rejected": -0.10310093313455582, "step": 8030 }, { "epoch": 0.53, "learning_rate": 2.709023911294273e-06, "logits/chosen": -2.374183177947998, "logits/rejected": -1.9083032608032227, "logps/chosen": -242.0371551513672, "logps/rejected": -225.60302734375, "loss": 0.6868, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002816893858835101, "rewards/margins": 0.14465411007404327, "rewards/rejected": -0.1418372094631195, "step": 8040 }, { "epoch": 0.53, "learning_rate": 2.7033334534783806e-06, "logits/chosen": -2.263575792312622, "logits/rejected": -2.3582472801208496, "logps/chosen": -200.14503479003906, "logps/rejected": -225.3608856201172, "loss": 0.6891, "rewards/accuracies": 0.5, "rewards/chosen": -0.009991501457989216, "rewards/margins": 0.09016770124435425, "rewards/rejected": -0.10015920549631119, "step": 8050 }, { "epoch": 0.53, "learning_rate": 2.697641934975592e-06, "logits/chosen": -2.2752552032470703, "logits/rejected": -2.050177574157715, "logps/chosen": -229.1072998046875, "logps/rejected": -205.2366180419922, "loss": 0.6883, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.013208845630288124, "rewards/margins": 0.08683110773563385, "rewards/rejected": -0.10003993660211563, "step": 8060 }, { "epoch": 0.53, "learning_rate": 2.691949385475654e-06, "logits/chosen": -2.3117451667785645, "logits/rejected": -2.063112258911133, "logps/chosen": -246.4084930419922, "logps/rejected": -229.3635711669922, "loss": 0.6892, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02517673373222351, "rewards/margins": 0.0852632075548172, "rewards/rejected": -0.11043993383646011, "step": 8070 }, { "epoch": 0.53, "learning_rate": 2.6862558346736937e-06, "logits/chosen": -2.239243268966675, "logits/rejected": -2.0910251140594482, "logps/chosen": -241.4723663330078, "logps/rejected": -251.2477264404297, "loss": 0.6856, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.031201153993606567, "rewards/margins": 0.14977136254310608, "rewards/rejected": -0.18097251653671265, "step": 8080 }, { "epoch": 0.53, "learning_rate": 2.6805613122700617e-06, "logits/chosen": -2.282254457473755, "logits/rejected": -1.951345682144165, "logps/chosen": -227.823486328125, "logps/rejected": -238.18466186523438, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04250997677445412, "rewards/margins": 0.09419043362140656, "rewards/rejected": -0.13670040667057037, "step": 8090 }, { "epoch": 0.53, "learning_rate": 2.674865847970176e-06, "logits/chosen": -2.219407320022583, "logits/rejected": -1.950874924659729, "logps/chosen": -209.02536010742188, "logps/rejected": -239.494384765625, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.014089837670326233, "rewards/margins": 0.08020684868097305, "rewards/rejected": -0.09429670870304108, "step": 8100 }, { "epoch": 0.53, "eval_logits/chosen": -2.2729127407073975, "eval_logits/rejected": -2.08774733543396, "eval_logps/chosen": -233.5811309814453, "eval_logps/rejected": -221.25350952148438, "eval_loss": 0.6898881793022156, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.01576184667646885, "eval_rewards/margins": 0.08065415918827057, "eval_rewards/rejected": -0.09641600400209427, "eval_runtime": 710.7109, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 8100 }, { "epoch": 0.53, "learning_rate": 2.669169471484368e-06, "logits/chosen": -2.0301496982574463, "logits/rejected": -2.0818302631378174, "logps/chosen": -168.10691833496094, "logps/rejected": -169.706298828125, "loss": 0.6909, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03048858605325222, "rewards/margins": 0.04239571467041969, "rewards/rejected": -0.07288429886102676, "step": 8110 }, { "epoch": 0.53, "learning_rate": 2.6634722125277278e-06, "logits/chosen": -2.373579502105713, "logits/rejected": -2.051607370376587, "logps/chosen": -235.17337036132812, "logps/rejected": -251.06521606445312, "loss": 0.6887, "rewards/accuracies": 0.75, "rewards/chosen": -0.030534693971276283, "rewards/margins": 0.076970174908638, "rewards/rejected": -0.10750486701726913, "step": 8120 }, { "epoch": 0.53, "learning_rate": 2.6577741008199498e-06, "logits/chosen": -2.2919061183929443, "logits/rejected": -1.9393638372421265, "logps/chosen": -256.58197021484375, "logps/rejected": -211.50302124023438, "loss": 0.6866, "rewards/accuracies": 0.75, "rewards/chosen": -0.0042382776737213135, "rewards/margins": 0.1496235430240631, "rewards/rejected": -0.15386183559894562, "step": 8130 }, { "epoch": 0.53, "learning_rate": 2.652075166085175e-06, "logits/chosen": -2.1796364784240723, "logits/rejected": -2.135631561279297, "logps/chosen": -235.9457244873047, "logps/rejected": -273.9045715332031, "loss": 0.6874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.013664024882018566, "rewards/margins": 0.14413678646087646, "rewards/rejected": -0.1578008234500885, "step": 8140 }, { "epoch": 0.53, "learning_rate": 2.6463754380518395e-06, "logits/chosen": -2.159858465194702, "logits/rejected": -1.9525247812271118, "logps/chosen": -236.5515899658203, "logps/rejected": -190.86343383789062, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": -0.05593949556350708, "rewards/margins": 0.09138914197683334, "rewards/rejected": -0.14732863008975983, "step": 8150 }, { "epoch": 0.53, "learning_rate": 2.6406749464525167e-06, "logits/chosen": -2.2781941890716553, "logits/rejected": -1.997957468032837, "logps/chosen": -233.92446899414062, "logps/rejected": -196.24688720703125, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0036535251419991255, "rewards/margins": 0.09554257988929749, "rewards/rejected": -0.09919609874486923, "step": 8160 }, { "epoch": 0.53, "learning_rate": 2.634973721023762e-06, "logits/chosen": -2.3184168338775635, "logits/rejected": -2.161243200302124, "logps/chosen": -258.32891845703125, "logps/rejected": -215.8189239501953, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.03866691142320633, "rewards/margins": 0.06928583234548569, "rewards/rejected": -0.10795273631811142, "step": 8170 }, { "epoch": 0.54, "learning_rate": 2.6292717915059605e-06, "logits/chosen": -2.3634932041168213, "logits/rejected": -2.096513032913208, "logps/chosen": -278.8802490234375, "logps/rejected": -233.5529022216797, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03250148147344589, "rewards/margins": 0.10865737497806549, "rewards/rejected": -0.1411588490009308, "step": 8180 }, { "epoch": 0.54, "learning_rate": 2.6235691876431706e-06, "logits/chosen": -2.171806812286377, "logits/rejected": -2.2218527793884277, "logps/chosen": -221.06301879882812, "logps/rejected": -234.08663940429688, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": -0.02447427250444889, "rewards/margins": 0.07424478232860565, "rewards/rejected": -0.09871906042098999, "step": 8190 }, { "epoch": 0.54, "learning_rate": 2.6178659391829673e-06, "logits/chosen": -2.3931174278259277, "logits/rejected": -2.1151492595672607, "logps/chosen": -236.8017120361328, "logps/rejected": -204.3024139404297, "loss": 0.6907, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006762659642845392, "rewards/margins": 0.08343976736068726, "rewards/rejected": -0.0766771137714386, "step": 8200 }, { "epoch": 0.54, "eval_logits/chosen": -2.2691376209259033, "eval_logits/rejected": -2.084272861480713, "eval_logps/chosen": -234.5026397705078, "eval_logps/rejected": -222.2466278076172, "eval_loss": 0.6898766160011292, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.02497694082558155, "eval_rewards/margins": 0.08137031644582748, "eval_rewards/rejected": -0.10634726285934448, "eval_runtime": 711.535, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 8200 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -2.2570462226867676, "logits/rejected": -2.002037525177002, "logps/chosen": -194.84579467773438, "logps/rejected": -200.37400817871094, "loss": 0.6911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.027614828199148178, "rewards/margins": 0.06327278167009354, "rewards/rejected": -0.09088762104511261, "step": 8210 }, { "epoch": 0.54, "learning_rate": 2.606457627477277e-06, "logits/chosen": -2.1911864280700684, "logits/rejected": -2.134552478790283, "logps/chosen": -176.81307983398438, "logps/rejected": -189.58029174804688, "loss": 0.6916, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.011837871745228767, "rewards/margins": 0.09415124356746674, "rewards/rejected": -0.10598911345005035, "step": 8220 }, { "epoch": 0.54, "learning_rate": 2.6007526237431324e-06, "logits/chosen": -2.330580234527588, "logits/rejected": -2.279081344604492, "logps/chosen": -182.9707489013672, "logps/rejected": -206.08935546875, "loss": 0.6893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.009412150830030441, "rewards/margins": 0.0900057703256607, "rewards/rejected": -0.09941791743040085, "step": 8230 }, { "epoch": 0.54, "learning_rate": 2.5950470944339478e-06, "logits/chosen": -2.110105514526367, "logits/rejected": -2.1718857288360596, "logps/chosen": -220.61978149414062, "logps/rejected": -220.62734985351562, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01182152982801199, "rewards/margins": 0.03503318130970001, "rewards/rejected": -0.023211652413010597, "step": 8240 }, { "epoch": 0.54, "learning_rate": 2.58934106931256e-06, "logits/chosen": -2.2483153343200684, "logits/rejected": -1.9538730382919312, "logps/chosen": -221.7985382080078, "logps/rejected": -208.62557983398438, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.010264934040606022, "rewards/margins": 0.06004582718014717, "rewards/rejected": -0.07031075656414032, "step": 8250 }, { "epoch": 0.54, "learning_rate": 2.58363457814439e-06, "logits/chosen": -2.243074417114258, "logits/rejected": -1.955980896949768, "logps/chosen": -214.344970703125, "logps/rejected": -209.1015625, "loss": 0.6878, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03673034533858299, "rewards/margins": 0.0848006159067154, "rewards/rejected": -0.12153096497058868, "step": 8260 }, { "epoch": 0.54, "learning_rate": 2.5779276506972924e-06, "logits/chosen": -2.2136752605438232, "logits/rejected": -2.1928346157073975, "logps/chosen": -233.85415649414062, "logps/rejected": -200.51458740234375, "loss": 0.6912, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.019013002514839172, "rewards/margins": 0.0604260191321373, "rewards/rejected": -0.07943902909755707, "step": 8270 }, { "epoch": 0.54, "learning_rate": 2.5722203167413945e-06, "logits/chosen": -2.336066961288452, "logits/rejected": -2.0090115070343018, "logps/chosen": -284.7717590332031, "logps/rejected": -210.93600463867188, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014740842394530773, "rewards/margins": 0.08085087686777115, "rewards/rejected": -0.0955917239189148, "step": 8280 }, { "epoch": 0.54, "learning_rate": 2.5665126060489476e-06, "logits/chosen": -2.30047607421875, "logits/rejected": -2.150911569595337, "logps/chosen": -190.69834899902344, "logps/rejected": -225.66976928710938, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02596314251422882, "rewards/margins": 0.06124822050333023, "rewards/rejected": -0.08721135556697845, "step": 8290 }, { "epoch": 0.54, "learning_rate": 2.560804548394165e-06, "logits/chosen": -2.222855567932129, "logits/rejected": -1.9643363952636719, "logps/chosen": -249.7584228515625, "logps/rejected": -214.04666137695312, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02478059008717537, "rewards/margins": 0.0956047847867012, "rewards/rejected": -0.12038537114858627, "step": 8300 }, { "epoch": 0.54, "eval_logits/chosen": -2.2778053283691406, "eval_logits/rejected": -2.0923290252685547, "eval_logps/chosen": -232.2015380859375, "eval_logps/rejected": -219.4079132080078, "eval_loss": 0.6899590492248535, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.0019656550139188766, "eval_rewards/margins": 0.07599426060914993, "eval_rewards/rejected": -0.07795991748571396, "eval_runtime": 710.1344, "eval_samples_per_second": 2.816, "eval_steps_per_second": 1.408, "step": 8300 }, { "epoch": 0.54, "learning_rate": 2.5550961735530734e-06, "logits/chosen": -2.106081008911133, "logits/rejected": -2.282960891723633, "logps/chosen": -161.3715362548828, "logps/rejected": -198.7847900390625, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": 0.016642851755023003, "rewards/margins": 0.06822551786899567, "rewards/rejected": -0.051582664251327515, "step": 8310 }, { "epoch": 0.54, "learning_rate": 2.549387511303351e-06, "logits/chosen": -2.265373706817627, "logits/rejected": -2.3061885833740234, "logps/chosen": -168.9114990234375, "logps/rejected": -219.42587280273438, "loss": 0.6896, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008253363892436028, "rewards/margins": 0.062337376177310944, "rewards/rejected": -0.05408401042222977, "step": 8320 }, { "epoch": 0.55, "learning_rate": 2.5436785914241774e-06, "logits/chosen": -2.2159152030944824, "logits/rejected": -2.2393479347229004, "logps/chosen": -200.00836181640625, "logps/rejected": -181.49374389648438, "loss": 0.6869, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.01049137394875288, "rewards/margins": 0.1345503181219101, "rewards/rejected": -0.14504170417785645, "step": 8330 }, { "epoch": 0.55, "learning_rate": 2.5379694436960746e-06, "logits/chosen": -2.3889286518096924, "logits/rejected": -2.1922972202301025, "logps/chosen": -243.3367919921875, "logps/rejected": -261.3111877441406, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.026742050424218178, "rewards/margins": 0.07409554719924927, "rewards/rejected": -0.047353483736515045, "step": 8340 }, { "epoch": 0.55, "learning_rate": 2.5322600979007533e-06, "logits/chosen": -2.403104305267334, "logits/rejected": -2.162173271179199, "logps/chosen": -212.3261260986328, "logps/rejected": -199.4026641845703, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0016930897254496813, "rewards/margins": 0.07775741815567017, "rewards/rejected": -0.0760643258690834, "step": 8350 }, { "epoch": 0.55, "learning_rate": 2.5265505838209592e-06, "logits/chosen": -2.4180874824523926, "logits/rejected": -2.0715489387512207, "logps/chosen": -256.7772521972656, "logps/rejected": -215.9575958251953, "loss": 0.6926, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0018636692548170686, "rewards/margins": 0.055936507880687714, "rewards/rejected": -0.05407283455133438, "step": 8360 }, { "epoch": 0.55, "learning_rate": 2.520840931240314e-06, "logits/chosen": -2.448770046234131, "logits/rejected": -1.9609102010726929, "logps/chosen": -208.07290649414062, "logps/rejected": -152.3336639404297, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017356865108013153, "rewards/margins": 0.07981632649898529, "rewards/rejected": -0.06245948001742363, "step": 8370 }, { "epoch": 0.55, "learning_rate": 2.515131169943162e-06, "logits/chosen": -1.9940284490585327, "logits/rejected": -2.0735738277435303, "logps/chosen": -258.408203125, "logps/rejected": -259.9877624511719, "loss": 0.6911, "rewards/accuracies": 0.75, "rewards/chosen": -0.0031367135234177113, "rewards/margins": 0.08551900088787079, "rewards/rejected": -0.08865571022033691, "step": 8380 }, { "epoch": 0.55, "learning_rate": 2.509421329714416e-06, "logits/chosen": -2.1275012493133545, "logits/rejected": -2.1602554321289062, "logps/chosen": -206.140869140625, "logps/rejected": -231.0389862060547, "loss": 0.6922, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.6808509826660156e-05, "rewards/margins": 0.03706061840057373, "rewards/rejected": -0.03707743063569069, "step": 8390 }, { "epoch": 0.55, "learning_rate": 2.5037114403393987e-06, "logits/chosen": -2.247596263885498, "logits/rejected": -1.9996188879013062, "logps/chosen": -209.26681518554688, "logps/rejected": -182.84060668945312, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.017135795205831528, "rewards/margins": 0.07354326546192169, "rewards/rejected": -0.05640747398138046, "step": 8400 }, { "epoch": 0.55, "eval_logits/chosen": -2.2804696559906006, "eval_logits/rejected": -2.095289468765259, "eval_logps/chosen": -230.7716522216797, "eval_logps/rejected": -217.13856506347656, "eval_loss": 0.6900351643562317, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": 0.012332833372056484, "eval_rewards/margins": 0.0675993338227272, "eval_rewards/rejected": -0.055266499519348145, "eval_runtime": 713.3682, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 8400 }, { "epoch": 0.55, "learning_rate": 2.4980015316036908e-06, "logits/chosen": -2.116654872894287, "logits/rejected": -2.1679673194885254, "logps/chosen": -173.55227661132812, "logps/rejected": -206.0382080078125, "loss": 0.6871, "rewards/accuracies": 0.625, "rewards/chosen": 0.022574327886104584, "rewards/margins": 0.10900095850229263, "rewards/rejected": -0.08642663061618805, "step": 8410 }, { "epoch": 0.55, "learning_rate": 2.4922916332929725e-06, "logits/chosen": -2.4510018825531006, "logits/rejected": -2.1898789405822754, "logps/chosen": -234.3470916748047, "logps/rejected": -197.39511108398438, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.010771475732326508, "rewards/margins": 0.02662494219839573, "rewards/rejected": -0.015853462740778923, "step": 8420 }, { "epoch": 0.55, "learning_rate": 2.4865817751928716e-06, "logits/chosen": -2.1895623207092285, "logits/rejected": -2.174008369445801, "logps/chosen": -193.5983123779297, "logps/rejected": -231.7257537841797, "loss": 0.6863, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.018903840333223343, "rewards/margins": 0.11705265939235687, "rewards/rejected": -0.09814882278442383, "step": 8430 }, { "epoch": 0.55, "learning_rate": 2.4808719870888037e-06, "logits/chosen": -2.0574288368225098, "logits/rejected": -1.983668565750122, "logps/chosen": -216.3809356689453, "logps/rejected": -193.36599731445312, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.012905344367027283, "rewards/margins": 0.10258817672729492, "rewards/rejected": -0.08968283236026764, "step": 8440 }, { "epoch": 0.55, "learning_rate": 2.4751622987658206e-06, "logits/chosen": -2.475071430206299, "logits/rejected": -2.2941946983337402, "logps/chosen": -235.42440795898438, "logps/rejected": -230.3686065673828, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.025258636102080345, "rewards/margins": 0.05684860795736313, "rewards/rejected": -0.03158997744321823, "step": 8450 }, { "epoch": 0.55, "learning_rate": 2.4694527400084546e-06, "logits/chosen": -2.25466251373291, "logits/rejected": -2.161506175994873, "logps/chosen": -222.21908569335938, "logps/rejected": -224.0065155029297, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": 0.0002555936516728252, "rewards/margins": 0.07632071524858475, "rewards/rejected": -0.07606511563062668, "step": 8460 }, { "epoch": 0.55, "learning_rate": 2.4637433406005607e-06, "logits/chosen": -2.4585928916931152, "logits/rejected": -2.344909191131592, "logps/chosen": -310.13995361328125, "logps/rejected": -286.87567138671875, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.004136279225349426, "rewards/margins": 0.04778838902711868, "rewards/rejected": -0.05192466825246811, "step": 8470 }, { "epoch": 0.55, "learning_rate": 2.4580341303251628e-06, "logits/chosen": -2.2639238834381104, "logits/rejected": -2.002631425857544, "logps/chosen": -259.3542175292969, "logps/rejected": -229.9150848388672, "loss": 0.6899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005969688296318054, "rewards/margins": 0.06959638744592667, "rewards/rejected": -0.06362669169902802, "step": 8480 }, { "epoch": 0.56, "learning_rate": 2.4523251389642984e-06, "logits/chosen": -2.16398024559021, "logits/rejected": -2.036417007446289, "logps/chosen": -256.45709228515625, "logps/rejected": -229.12576293945312, "loss": 0.6877, "rewards/accuracies": 0.625, "rewards/chosen": -0.018977751955389977, "rewards/margins": 0.09807170927524567, "rewards/rejected": -0.11704947054386139, "step": 8490 }, { "epoch": 0.56, "learning_rate": 2.4466163962988626e-06, "logits/chosen": -2.480299711227417, "logits/rejected": -2.1086299419403076, "logps/chosen": -281.618896484375, "logps/rejected": -193.210205078125, "loss": 0.6885, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.010179015807807446, "rewards/margins": 0.10310627520084381, "rewards/rejected": -0.11328530311584473, "step": 8500 }, { "epoch": 0.56, "eval_logits/chosen": -2.2819478511810303, "eval_logits/rejected": -2.0963072776794434, "eval_logps/chosen": -231.94549560546875, "eval_logps/rejected": -220.13168334960938, "eval_loss": 0.689839780330658, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": 0.0005945622688159347, "eval_rewards/margins": 0.08579233288764954, "eval_rewards/rejected": -0.08519777655601501, "eval_runtime": 712.4374, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 8500 }, { "epoch": 0.56, "learning_rate": 2.4409079321084543e-06, "logits/chosen": -2.2277088165283203, "logits/rejected": -2.284764051437378, "logps/chosen": -213.2277374267578, "logps/rejected": -252.33645629882812, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01891925372183323, "rewards/margins": 0.0916977971792221, "rewards/rejected": -0.07277854532003403, "step": 8510 }, { "epoch": 0.56, "learning_rate": 2.4351997761712184e-06, "logits/chosen": -2.4851880073547363, "logits/rejected": -2.031656265258789, "logps/chosen": -244.4697265625, "logps/rejected": -189.30319213867188, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.009185681119561195, "rewards/margins": 0.11539344489574432, "rewards/rejected": -0.10620777308940887, "step": 8520 }, { "epoch": 0.56, "learning_rate": 2.4294919582636933e-06, "logits/chosen": -2.274225950241089, "logits/rejected": -2.129521369934082, "logps/chosen": -209.3656768798828, "logps/rejected": -206.8007354736328, "loss": 0.6918, "rewards/accuracies": 0.625, "rewards/chosen": 0.017368298023939133, "rewards/margins": 0.09274449944496155, "rewards/rejected": -0.07537619769573212, "step": 8530 }, { "epoch": 0.56, "learning_rate": 2.423784508160652e-06, "logits/chosen": -2.352238655090332, "logits/rejected": -2.100398540496826, "logps/chosen": -256.19207763671875, "logps/rejected": -215.8179168701172, "loss": 0.6912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.009613009169697762, "rewards/margins": 0.07463245093822479, "rewards/rejected": -0.084245465695858, "step": 8540 }, { "epoch": 0.56, "learning_rate": 2.418077455634951e-06, "logits/chosen": -2.176546573638916, "logits/rejected": -2.22251558303833, "logps/chosen": -218.07138061523438, "logps/rejected": -250.1627655029297, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.015002986416220665, "rewards/margins": 0.045342180877923965, "rewards/rejected": -0.06034516543149948, "step": 8550 }, { "epoch": 0.56, "learning_rate": 2.4123708304573714e-06, "logits/chosen": -2.3653371334075928, "logits/rejected": -2.2171475887298584, "logps/chosen": -288.91107177734375, "logps/rejected": -280.00177001953125, "loss": 0.6899, "rewards/accuracies": 0.75, "rewards/chosen": 0.008562428876757622, "rewards/margins": 0.07789406925439835, "rewards/rejected": -0.06933163106441498, "step": 8560 }, { "epoch": 0.56, "learning_rate": 2.406664662396465e-06, "logits/chosen": -2.1397430896759033, "logits/rejected": -1.9881635904312134, "logps/chosen": -188.5435333251953, "logps/rejected": -179.0817108154297, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04161657765507698, "rewards/margins": 0.06220381706953049, "rewards/rejected": -0.10382040590047836, "step": 8570 }, { "epoch": 0.56, "learning_rate": 2.4009589812184012e-06, "logits/chosen": -2.3080785274505615, "logits/rejected": -1.9249913692474365, "logps/chosen": -205.11972045898438, "logps/rejected": -160.22409057617188, "loss": 0.6887, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.018998144194483757, "rewards/margins": 0.08215345442295074, "rewards/rejected": -0.10115160048007965, "step": 8580 }, { "epoch": 0.56, "learning_rate": 2.3952538166868073e-06, "logits/chosen": -2.0843875408172607, "logits/rejected": -2.144876480102539, "logps/chosen": -218.38809204101562, "logps/rejected": -214.17666625976562, "loss": 0.6871, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.008417466655373573, "rewards/margins": 0.1238173246383667, "rewards/rejected": -0.13223478198051453, "step": 8590 }, { "epoch": 0.56, "learning_rate": 2.389549198562616e-06, "logits/chosen": -2.282944917678833, "logits/rejected": -1.855536699295044, "logps/chosen": -225.4339599609375, "logps/rejected": -205.1066131591797, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0064767589792609215, "rewards/margins": 0.09652809798717499, "rewards/rejected": -0.09005134552717209, "step": 8600 }, { "epoch": 0.56, "eval_logits/chosen": -2.289522886276245, "eval_logits/rejected": -2.1033294200897217, "eval_logps/chosen": -232.30738830566406, "eval_logps/rejected": -220.40335083007812, "eval_loss": 0.6898036003112793, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.0030244227964431047, "eval_rewards/margins": 0.08489015698432922, "eval_rewards/rejected": -0.08791457116603851, "eval_runtime": 714.176, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 8600 }, { "epoch": 0.56, "learning_rate": 2.3838451566039098e-06, "logits/chosen": -2.309410333633423, "logits/rejected": -2.1341471672058105, "logps/chosen": -240.350830078125, "logps/rejected": -234.599365234375, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.018408339470624924, "rewards/margins": 0.04382626712322235, "rewards/rejected": -0.062234602868556976, "step": 8610 }, { "epoch": 0.56, "learning_rate": 2.3781417205657662e-06, "logits/chosen": -2.3165881633758545, "logits/rejected": -2.01545786857605, "logps/chosen": -197.41787719726562, "logps/rejected": -167.50404357910156, "loss": 0.6907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.003385829506441951, "rewards/margins": 0.08293353766202927, "rewards/rejected": -0.0795477032661438, "step": 8620 }, { "epoch": 0.56, "learning_rate": 2.3724389202001006e-06, "logits/chosen": -2.3407962322235107, "logits/rejected": -2.0748586654663086, "logps/chosen": -203.34979248046875, "logps/rejected": -187.19520568847656, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.010263567790389061, "rewards/margins": 0.055420707911252975, "rewards/rejected": -0.06568428128957748, "step": 8630 }, { "epoch": 0.57, "learning_rate": 2.366736785255514e-06, "logits/chosen": -2.227527141571045, "logits/rejected": -2.1566596031188965, "logps/chosen": -200.77955627441406, "logps/rejected": -196.5257110595703, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.020507413893938065, "rewards/margins": 0.06966142356395721, "rewards/rejected": -0.09016883373260498, "step": 8640 }, { "epoch": 0.57, "learning_rate": 2.3610353454771355e-06, "logits/chosen": -2.123077869415283, "logits/rejected": -2.05281925201416, "logps/chosen": -190.9486083984375, "logps/rejected": -181.61386108398438, "loss": 0.6914, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.01001174096018076, "rewards/margins": 0.06959837675094604, "rewards/rejected": -0.07961011677980423, "step": 8650 }, { "epoch": 0.57, "learning_rate": 2.355334630606467e-06, "logits/chosen": -2.493744373321533, "logits/rejected": -2.0158677101135254, "logps/chosen": -240.03988647460938, "logps/rejected": -183.7025146484375, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0025037326849997044, "rewards/margins": 0.07208188623189926, "rewards/rejected": -0.06957816332578659, "step": 8660 }, { "epoch": 0.57, "learning_rate": 2.349634670381231e-06, "logits/chosen": -2.0954480171203613, "logits/rejected": -2.0449440479278564, "logps/chosen": -208.32308959960938, "logps/rejected": -224.8218231201172, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01123981736600399, "rewards/margins": 0.0670008659362793, "rewards/rejected": -0.07824068516492844, "step": 8670 }, { "epoch": 0.57, "learning_rate": 2.3439354945352104e-06, "logits/chosen": -2.341536045074463, "logits/rejected": -2.278677463531494, "logps/chosen": -245.1410675048828, "logps/rejected": -203.81253051757812, "loss": 0.6923, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.01617489382624626, "rewards/margins": 0.030692163854837418, "rewards/rejected": -0.04686705023050308, "step": 8680 }, { "epoch": 0.57, "learning_rate": 2.3382371327981e-06, "logits/chosen": -2.2057578563690186, "logits/rejected": -2.200843334197998, "logps/chosen": -230.00222778320312, "logps/rejected": -225.0457000732422, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.014740320853888988, "rewards/margins": 0.09338275343179703, "rewards/rejected": -0.07864242792129517, "step": 8690 }, { "epoch": 0.57, "learning_rate": 2.3325396148953456e-06, "logits/chosen": -2.073983907699585, "logits/rejected": -2.184781312942505, "logps/chosen": -172.51876831054688, "logps/rejected": -236.9097900390625, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.012173332273960114, "rewards/margins": 0.10260754823684692, "rewards/rejected": -0.11478088051080704, "step": 8700 }, { "epoch": 0.57, "eval_logits/chosen": -2.2970077991485596, "eval_logits/rejected": -2.1105120182037354, "eval_logps/chosen": -230.84942626953125, "eval_logps/rejected": -218.98678588867188, "eval_loss": 0.6898258328437805, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": 0.011555157601833344, "eval_rewards/margins": 0.0853039100766182, "eval_rewards/rejected": -0.07374875247478485, "eval_runtime": 712.4551, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 8700 }, { "epoch": 0.57, "learning_rate": 2.3268429705479915e-06, "logits/chosen": -2.4747602939605713, "logits/rejected": -2.1206700801849365, "logps/chosen": -222.29537963867188, "logps/rejected": -189.93572998046875, "loss": 0.6905, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.02517220936715603, "rewards/margins": 0.08959371596574783, "rewards/rejected": -0.06442151963710785, "step": 8710 }, { "epoch": 0.57, "learning_rate": 2.3211472294725248e-06, "logits/chosen": -2.3218271732330322, "logits/rejected": -2.1841847896575928, "logps/chosen": -212.7299346923828, "logps/rejected": -209.63003540039062, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03685241565108299, "rewards/margins": 0.09567222744226456, "rewards/rejected": -0.058819811791181564, "step": 8720 }, { "epoch": 0.57, "learning_rate": 2.315452421380721e-06, "logits/chosen": -2.2035815715789795, "logits/rejected": -1.7523466348648071, "logps/chosen": -253.510986328125, "logps/rejected": -214.3549346923828, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": 0.014333168976008892, "rewards/margins": 0.08837004750967026, "rewards/rejected": -0.07403689622879028, "step": 8730 }, { "epoch": 0.57, "learning_rate": 2.3097585759794886e-06, "logits/chosen": -2.2899675369262695, "logits/rejected": -1.9071069955825806, "logps/chosen": -251.5904541015625, "logps/rejected": -201.7096710205078, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": 0.027508467435836792, "rewards/margins": 0.13511213660240173, "rewards/rejected": -0.10760366916656494, "step": 8740 }, { "epoch": 0.57, "learning_rate": 2.3040657229707155e-06, "logits/chosen": -2.304961681365967, "logits/rejected": -2.1966376304626465, "logps/chosen": -170.78793334960938, "logps/rejected": -190.66493225097656, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": 0.012596605345606804, "rewards/margins": 0.08321347087621689, "rewards/rejected": -0.07061685621738434, "step": 8750 }, { "epoch": 0.57, "learning_rate": 2.2983738920511104e-06, "logits/chosen": -2.464939832687378, "logits/rejected": -1.987932801246643, "logps/chosen": -265.67718505859375, "logps/rejected": -223.0201416015625, "loss": 0.6913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04115743562579155, "rewards/margins": 0.07645048201084137, "rewards/rejected": -0.03529305011034012, "step": 8760 }, { "epoch": 0.57, "learning_rate": 2.2926831129120523e-06, "logits/chosen": -2.120628833770752, "logits/rejected": -2.073472499847412, "logps/chosen": -232.22799682617188, "logps/rejected": -209.72720336914062, "loss": 0.6919, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03563634306192398, "rewards/margins": 0.0585489384829998, "rewards/rejected": -0.02291259728372097, "step": 8770 }, { "epoch": 0.57, "learning_rate": 2.2869934152394323e-06, "logits/chosen": -2.321106195449829, "logits/rejected": -2.0543007850646973, "logps/chosen": -268.8101501464844, "logps/rejected": -220.52853393554688, "loss": 0.6887, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0015528525691479445, "rewards/margins": 0.07320442795753479, "rewards/rejected": -0.07165157794952393, "step": 8780 }, { "epoch": 0.58, "learning_rate": 2.281304828713501e-06, "logits/chosen": -2.2122366428375244, "logits/rejected": -2.1436047554016113, "logps/chosen": -231.771728515625, "logps/rejected": -231.8656768798828, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.012155646458268166, "rewards/margins": 0.07596425712108612, "rewards/rejected": -0.0638086199760437, "step": 8790 }, { "epoch": 0.58, "learning_rate": 2.275617383008711e-06, "logits/chosen": -2.3027684688568115, "logits/rejected": -2.2025887966156006, "logps/chosen": -235.1232452392578, "logps/rejected": -237.69161987304688, "loss": 0.6913, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.003641972318291664, "rewards/margins": 0.05236151069402695, "rewards/rejected": -0.048719536513090134, "step": 8800 }, { "epoch": 0.58, "eval_logits/chosen": -2.304422616958618, "eval_logits/rejected": -2.117183208465576, "eval_logps/chosen": -229.04266357421875, "eval_logps/rejected": -216.80625915527344, "eval_loss": 0.6898345351219177, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": 0.029622970148921013, "eval_rewards/margins": 0.0815664604306221, "eval_rewards/rejected": -0.05194348469376564, "eval_runtime": 711.6478, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 8800 }, { "epoch": 0.58, "learning_rate": 2.269931107793567e-06, "logits/chosen": -2.2280020713806152, "logits/rejected": -2.136003255844116, "logps/chosen": -208.7579803466797, "logps/rejected": -222.9391326904297, "loss": 0.6908, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.035223446786403656, "rewards/margins": 0.060886919498443604, "rewards/rejected": -0.02566346526145935, "step": 8810 }, { "epoch": 0.58, "learning_rate": 2.2642460327304655e-06, "logits/chosen": -2.1614763736724854, "logits/rejected": -2.2132813930511475, "logps/chosen": -240.7371063232422, "logps/rejected": -232.02880859375, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.038226418197155, "rewards/margins": 0.07139203697443008, "rewards/rejected": -0.03316562622785568, "step": 8820 }, { "epoch": 0.58, "learning_rate": 2.258562187475543e-06, "logits/chosen": -2.15134859085083, "logits/rejected": -2.1060047149658203, "logps/chosen": -225.7406005859375, "logps/rejected": -195.5087890625, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012063100934028625, "rewards/margins": 0.07280053198337555, "rewards/rejected": -0.060737431049346924, "step": 8830 }, { "epoch": 0.58, "learning_rate": 2.2528796016785196e-06, "logits/chosen": -2.197204828262329, "logits/rejected": -2.018406391143799, "logps/chosen": -187.3249053955078, "logps/rejected": -208.0103759765625, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": 0.02593032643198967, "rewards/margins": 0.11019430309534073, "rewards/rejected": -0.08426396548748016, "step": 8840 }, { "epoch": 0.58, "learning_rate": 2.247198304982548e-06, "logits/chosen": -2.239647388458252, "logits/rejected": -2.0400216579437256, "logps/chosen": -159.90098571777344, "logps/rejected": -163.93194580078125, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.020705249160528183, "rewards/margins": 0.07980392873287201, "rewards/rejected": -0.05909866839647293, "step": 8850 }, { "epoch": 0.58, "learning_rate": 2.2415183270240533e-06, "logits/chosen": -2.512545108795166, "logits/rejected": -2.268498182296753, "logps/chosen": -197.340087890625, "logps/rejected": -210.59591674804688, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01989520527422428, "rewards/margins": 0.08950191736221313, "rewards/rejected": -0.0696067214012146, "step": 8860 }, { "epoch": 0.58, "learning_rate": 2.2358396974325837e-06, "logits/chosen": -2.317462682723999, "logits/rejected": -2.1121644973754883, "logps/chosen": -239.705322265625, "logps/rejected": -221.26022338867188, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": 0.025622522458434105, "rewards/margins": 0.07821951061487198, "rewards/rejected": -0.05259697511792183, "step": 8870 }, { "epoch": 0.58, "learning_rate": 2.2301624458306525e-06, "logits/chosen": -2.4108285903930664, "logits/rejected": -2.1266798973083496, "logps/chosen": -259.18951416015625, "logps/rejected": -213.99862670898438, "loss": 0.6911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0015859712148085237, "rewards/margins": 0.061502885073423386, "rewards/rejected": -0.05991692468523979, "step": 8880 }, { "epoch": 0.58, "learning_rate": 2.2244866018335855e-06, "logits/chosen": -2.2686455249786377, "logits/rejected": -2.2580018043518066, "logps/chosen": -215.9559326171875, "logps/rejected": -237.7769012451172, "loss": 0.6912, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.011017450131475925, "rewards/margins": 0.07413921505212784, "rewards/rejected": -0.06312176585197449, "step": 8890 }, { "epoch": 0.58, "learning_rate": 2.2188121950493648e-06, "logits/chosen": -2.4074501991271973, "logits/rejected": -2.0326874256134033, "logps/chosen": -219.9821319580078, "logps/rejected": -146.60345458984375, "loss": 0.6906, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01238707359880209, "rewards/margins": 0.0487365797162056, "rewards/rejected": -0.06112365052103996, "step": 8900 }, { "epoch": 0.58, "eval_logits/chosen": -2.305030584335327, "eval_logits/rejected": -2.1172640323638916, "eval_logps/chosen": -231.61558532714844, "eval_logps/rejected": -220.36143493652344, "eval_loss": 0.6898223757743835, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": 0.003893795656040311, "eval_rewards/margins": 0.09138916432857513, "eval_rewards/rejected": -0.08749537914991379, "eval_runtime": 712.4849, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 8900 }, { "epoch": 0.58, "learning_rate": 2.2131392550784766e-06, "logits/chosen": -2.4283366203308105, "logits/rejected": -1.8540500402450562, "logps/chosen": -285.9861145019531, "logps/rejected": -198.90310668945312, "loss": 0.6923, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011074522510170937, "rewards/margins": 0.08481260389089584, "rewards/rejected": -0.09588713943958282, "step": 8910 }, { "epoch": 0.58, "learning_rate": 2.2074678115137533e-06, "logits/chosen": -2.1023287773132324, "logits/rejected": -2.0058627128601074, "logps/chosen": -195.75587463378906, "logps/rejected": -213.19921875, "loss": 0.6869, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.010389198549091816, "rewards/margins": 0.12526783347129822, "rewards/rejected": -0.13565704226493835, "step": 8920 }, { "epoch": 0.58, "learning_rate": 2.201797893940224e-06, "logits/chosen": -2.190784454345703, "logits/rejected": -1.9884440898895264, "logps/chosen": -232.9307403564453, "logps/rejected": -261.10540771484375, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0024665123783051968, "rewards/margins": 0.07824081182479858, "rewards/rejected": -0.07577430456876755, "step": 8930 }, { "epoch": 0.58, "learning_rate": 2.196129531934956e-06, "logits/chosen": -2.2389838695526123, "logits/rejected": -1.970949411392212, "logps/chosen": -235.01522827148438, "logps/rejected": -226.91268920898438, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02898770570755005, "rewards/margins": 0.09960027039051056, "rewards/rejected": -0.07061255723237991, "step": 8940 }, { "epoch": 0.59, "learning_rate": 2.190462755066902e-06, "logits/chosen": -2.251969814300537, "logits/rejected": -2.020610809326172, "logps/chosen": -263.11944580078125, "logps/rejected": -244.75009155273438, "loss": 0.6924, "rewards/accuracies": 0.75, "rewards/chosen": -0.007182478904724121, "rewards/margins": 0.07121424376964569, "rewards/rejected": -0.07839672267436981, "step": 8950 }, { "epoch": 0.59, "learning_rate": 2.184797592896746e-06, "logits/chosen": -2.379193067550659, "logits/rejected": -2.3389806747436523, "logps/chosen": -233.12423706054688, "logps/rejected": -215.5608673095703, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01311335526406765, "rewards/margins": 0.08969788253307343, "rewards/rejected": -0.07658452540636063, "step": 8960 }, { "epoch": 0.59, "learning_rate": 2.17913407497675e-06, "logits/chosen": -2.313098430633545, "logits/rejected": -2.381880283355713, "logps/chosen": -176.4674072265625, "logps/rejected": -218.6103973388672, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.019145773723721504, "rewards/margins": 0.0819178968667984, "rewards/rejected": -0.06277212500572205, "step": 8970 }, { "epoch": 0.59, "learning_rate": 2.173472230850596e-06, "logits/chosen": -2.4210402965545654, "logits/rejected": -2.2281031608581543, "logps/chosen": -201.3446044921875, "logps/rejected": -165.3816680908203, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": -0.010820412077009678, "rewards/margins": 0.04480786249041557, "rewards/rejected": -0.05562828108668327, "step": 8980 }, { "epoch": 0.59, "learning_rate": 2.1678120900532375e-06, "logits/chosen": -2.4019229412078857, "logits/rejected": -2.1054940223693848, "logps/chosen": -235.7376251220703, "logps/rejected": -216.53738403320312, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": 0.0017721873009577394, "rewards/margins": 0.10060055553913116, "rewards/rejected": -0.09882837533950806, "step": 8990 }, { "epoch": 0.59, "learning_rate": 2.1621536821107412e-06, "logits/chosen": -2.2904767990112305, "logits/rejected": -2.159829616546631, "logps/chosen": -201.26571655273438, "logps/rejected": -169.5950164794922, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": 0.013041026890277863, "rewards/margins": 0.103615902364254, "rewards/rejected": -0.09057489037513733, "step": 9000 }, { "epoch": 0.59, "eval_logits/chosen": -2.3073229789733887, "eval_logits/rejected": -2.1196234226226807, "eval_logps/chosen": -230.892333984375, "eval_logps/rejected": -219.00497436523438, "eval_loss": 0.6897886991500854, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": 0.011126398108899593, "eval_rewards/margins": 0.08505717664957047, "eval_rewards/rejected": -0.0739307776093483, "eval_runtime": 715.638, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 9000 }, { "epoch": 0.59, "learning_rate": 2.1564970365401346e-06, "logits/chosen": -2.342783212661743, "logits/rejected": -2.0789589881896973, "logps/chosen": -183.83070373535156, "logps/rejected": -154.10012817382812, "loss": 0.6886, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0032037317287176847, "rewards/margins": 0.08277516812086105, "rewards/rejected": -0.0795714408159256, "step": 9010 }, { "epoch": 0.59, "learning_rate": 2.1508421828492527e-06, "logits/chosen": -2.4671521186828613, "logits/rejected": -2.1197142601013184, "logps/chosen": -222.3729705810547, "logps/rejected": -161.41038513183594, "loss": 0.6922, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02819722518324852, "rewards/margins": 0.07277707755565643, "rewards/rejected": -0.04457986354827881, "step": 9020 }, { "epoch": 0.59, "learning_rate": 2.145189150536582e-06, "logits/chosen": -2.1406853199005127, "logits/rejected": -2.043962001800537, "logps/chosen": -219.89431762695312, "logps/rejected": -176.14846801757812, "loss": 0.6914, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02275204285979271, "rewards/margins": 0.05656403303146362, "rewards/rejected": -0.03381199389696121, "step": 9030 }, { "epoch": 0.59, "learning_rate": 2.139537969091107e-06, "logits/chosen": -2.1763834953308105, "logits/rejected": -2.141878843307495, "logps/chosen": -265.5818786621094, "logps/rejected": -207.55709838867188, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 1.7970241970033385e-05, "rewards/margins": 0.0414576455950737, "rewards/rejected": -0.04143967479467392, "step": 9040 }, { "epoch": 0.59, "learning_rate": 2.1338886679921603e-06, "logits/chosen": -2.2526087760925293, "logits/rejected": -2.184154748916626, "logps/chosen": -240.57388305664062, "logps/rejected": -225.2537841796875, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009691650979220867, "rewards/margins": 0.05090288445353508, "rewards/rejected": -0.041211239993572235, "step": 9050 }, { "epoch": 0.59, "learning_rate": 2.128241276709263e-06, "logits/chosen": -2.3358287811279297, "logits/rejected": -2.2810444831848145, "logps/chosen": -199.28738403320312, "logps/rejected": -226.06655883789062, "loss": 0.6915, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.039743922650814056, "rewards/margins": 0.06741781532764435, "rewards/rejected": -0.027673888951539993, "step": 9060 }, { "epoch": 0.59, "learning_rate": 2.1225958247019746e-06, "logits/chosen": -2.3715434074401855, "logits/rejected": -2.5027832984924316, "logps/chosen": -184.3772430419922, "logps/rejected": -209.1215362548828, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.027645772323012352, "rewards/margins": 0.046046603471040726, "rewards/rejected": -0.018400834873318672, "step": 9070 }, { "epoch": 0.59, "learning_rate": 2.1169523414197383e-06, "logits/chosen": -2.1513938903808594, "logits/rejected": -2.152141571044922, "logps/chosen": -196.04464721679688, "logps/rejected": -224.1463165283203, "loss": 0.6907, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.027610694989562035, "rewards/margins": 0.04780023545026779, "rewards/rejected": -0.020189542323350906, "step": 9080 }, { "epoch": 0.59, "learning_rate": 2.1113108563017267e-06, "logits/chosen": -2.248032808303833, "logits/rejected": -2.033977746963501, "logps/chosen": -212.1124725341797, "logps/rejected": -188.57859802246094, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.006707633379846811, "rewards/margins": 0.07095328718423843, "rewards/rejected": -0.06424565613269806, "step": 9090 }, { "epoch": 0.6, "learning_rate": 2.1056713987766905e-06, "logits/chosen": -2.443134307861328, "logits/rejected": -2.1070022583007812, "logps/chosen": -212.5425262451172, "logps/rejected": -172.75057983398438, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.02657800354063511, "rewards/margins": 0.08923501521348953, "rewards/rejected": -0.06265701353549957, "step": 9100 }, { "epoch": 0.6, "eval_logits/chosen": -2.312913179397583, "eval_logits/rejected": -2.1251325607299805, "eval_logps/chosen": -229.99118041992188, "eval_logps/rejected": -216.90176391601562, "eval_loss": 0.6898962259292603, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": 0.02013748697936535, "eval_rewards/margins": 0.07303596287965775, "eval_rewards/rejected": -0.05289847403764725, "eval_runtime": 710.189, "eval_samples_per_second": 2.816, "eval_steps_per_second": 1.408, "step": 9100 }, { "epoch": 0.6, "learning_rate": 2.1000339982628022e-06, "logits/chosen": -2.1159980297088623, "logits/rejected": -2.1975278854370117, "logps/chosen": -249.4115447998047, "logps/rejected": -221.4599609375, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01861291006207466, "rewards/margins": 0.06368504464626312, "rewards/rejected": -0.04507213830947876, "step": 9110 }, { "epoch": 0.6, "learning_rate": 2.0943986841675043e-06, "logits/chosen": -2.3297810554504395, "logits/rejected": -2.0988831520080566, "logps/chosen": -199.1885986328125, "logps/rejected": -190.6521453857422, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": 0.04022395610809326, "rewards/margins": 0.10803340375423431, "rewards/rejected": -0.06780944764614105, "step": 9120 }, { "epoch": 0.6, "learning_rate": 2.088765485887356e-06, "logits/chosen": -2.3123505115509033, "logits/rejected": -2.110137939453125, "logps/chosen": -242.97427368164062, "logps/rejected": -205.7440185546875, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.01740623079240322, "rewards/margins": 0.04395180940628052, "rewards/rejected": -0.026545578613877296, "step": 9130 }, { "epoch": 0.6, "learning_rate": 2.083134432807879e-06, "logits/chosen": -2.2417685985565186, "logits/rejected": -2.172234058380127, "logps/chosen": -193.64578247070312, "logps/rejected": -223.5891876220703, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006263398565351963, "rewards/margins": 0.08425328880548477, "rewards/rejected": -0.07798988372087479, "step": 9140 }, { "epoch": 0.6, "learning_rate": 2.077505554303404e-06, "logits/chosen": -2.3099794387817383, "logits/rejected": -2.274794816970825, "logps/chosen": -169.19174194335938, "logps/rejected": -179.0936279296875, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.053602445870637894, "rewards/margins": 0.05535256117582321, "rewards/rejected": -0.0017501137917861342, "step": 9150 }, { "epoch": 0.6, "learning_rate": 2.071878879736918e-06, "logits/chosen": -2.3148703575134277, "logits/rejected": -2.1224913597106934, "logps/chosen": -245.26870727539062, "logps/rejected": -331.78924560546875, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.01570909097790718, "rewards/margins": 0.08236773312091827, "rewards/rejected": -0.06665865331888199, "step": 9160 }, { "epoch": 0.6, "learning_rate": 2.0662544384599136e-06, "logits/chosen": -2.2138946056365967, "logits/rejected": -2.138765335083008, "logps/chosen": -197.99423217773438, "logps/rejected": -190.30654907226562, "loss": 0.6893, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.04112407937645912, "rewards/margins": 0.08797650039196014, "rewards/rejected": -0.046852417290210724, "step": 9170 }, { "epoch": 0.6, "learning_rate": 2.0606322598122314e-06, "logits/chosen": -2.2186291217803955, "logits/rejected": -2.3340086936950684, "logps/chosen": -185.21937561035156, "logps/rejected": -208.082763671875, "loss": 0.6924, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.012941636145114899, "rewards/margins": 0.03202500194311142, "rewards/rejected": -0.019083363935351372, "step": 9180 }, { "epoch": 0.6, "learning_rate": 2.0550123731219085e-06, "logits/chosen": -2.4984421730041504, "logits/rejected": -2.322842836380005, "logps/chosen": -249.52920532226562, "logps/rejected": -215.18814086914062, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.036506522446870804, "rewards/margins": 0.06803809106349945, "rewards/rejected": -0.03153156489133835, "step": 9190 }, { "epoch": 0.6, "learning_rate": 2.0493948077050267e-06, "logits/chosen": -2.162285327911377, "logits/rejected": -1.980200171470642, "logps/chosen": -190.65975952148438, "logps/rejected": -181.0004425048828, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.025962283834815025, "rewards/margins": 0.08430268615484238, "rewards/rejected": -0.0583404079079628, "step": 9200 }, { "epoch": 0.6, "eval_logits/chosen": -2.328345537185669, "eval_logits/rejected": -2.139697551727295, "eval_logps/chosen": -229.9346923828125, "eval_logps/rejected": -217.44418334960938, "eval_loss": 0.6898381114006042, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.020702635869383812, "eval_rewards/margins": 0.07902555167675018, "eval_rewards/rejected": -0.05832291021943092, "eval_runtime": 713.3551, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 9200 }, { "epoch": 0.6, "learning_rate": 2.0437795928655596e-06, "logits/chosen": -2.351074457168579, "logits/rejected": -2.3267416954040527, "logps/chosen": -279.7415466308594, "logps/rejected": -267.31201171875, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.019710825756192207, "rewards/margins": 0.05754191428422928, "rewards/rejected": -0.03783109039068222, "step": 9210 }, { "epoch": 0.6, "learning_rate": 2.0381667578952184e-06, "logits/chosen": -2.4281165599823, "logits/rejected": -2.208249568939209, "logps/chosen": -209.62942504882812, "logps/rejected": -225.8242645263672, "loss": 0.6882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.012434298172593117, "rewards/margins": 0.10577799379825592, "rewards/rejected": -0.09334369748830795, "step": 9220 }, { "epoch": 0.6, "learning_rate": 2.0325563320732995e-06, "logits/chosen": -2.4986178874969482, "logits/rejected": -2.109630584716797, "logps/chosen": -262.3493347167969, "logps/rejected": -227.225341796875, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.009889942593872547, "rewards/margins": 0.08893869817256927, "rewards/rejected": -0.0790487602353096, "step": 9230 }, { "epoch": 0.6, "learning_rate": 2.026948344666532e-06, "logits/chosen": -2.2422378063201904, "logits/rejected": -2.198185443878174, "logps/chosen": -199.4209747314453, "logps/rejected": -207.3389434814453, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.002950438065454364, "rewards/margins": 0.083625927567482, "rewards/rejected": -0.08657635748386383, "step": 9240 }, { "epoch": 0.61, "learning_rate": 2.0213428249289257e-06, "logits/chosen": -2.2211735248565674, "logits/rejected": -2.1415927410125732, "logps/chosen": -196.4319610595703, "logps/rejected": -207.9313507080078, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.021085821092128754, "rewards/margins": 0.09691883623600006, "rewards/rejected": -0.0758330225944519, "step": 9250 }, { "epoch": 0.61, "learning_rate": 2.0157398021016175e-06, "logits/chosen": -2.2351865768432617, "logits/rejected": -2.1974527835845947, "logps/chosen": -144.62660217285156, "logps/rejected": -199.48924255371094, "loss": 0.691, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02908751741051674, "rewards/margins": 0.08657882362604141, "rewards/rejected": -0.05749132111668587, "step": 9260 }, { "epoch": 0.61, "learning_rate": 2.010139305412719e-06, "logits/chosen": -2.506202220916748, "logits/rejected": -2.2831943035125732, "logps/chosen": -275.13079833984375, "logps/rejected": -239.76907348632812, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.014648482203483582, "rewards/margins": 0.07002347707748413, "rewards/rejected": -0.05537499859929085, "step": 9270 }, { "epoch": 0.61, "learning_rate": 2.0045413640771644e-06, "logits/chosen": -2.2399230003356934, "logits/rejected": -2.382997989654541, "logps/chosen": -254.0187530517578, "logps/rejected": -266.322021484375, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": 0.01146581582725048, "rewards/margins": 0.09047718346118927, "rewards/rejected": -0.07901137322187424, "step": 9280 }, { "epoch": 0.61, "learning_rate": 1.998946007296558e-06, "logits/chosen": -2.476470708847046, "logits/rejected": -2.1356618404388428, "logps/chosen": -306.3233947753906, "logps/rejected": -251.1251678466797, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02277219668030739, "rewards/margins": 0.0823870599269867, "rewards/rejected": -0.059614866971969604, "step": 9290 }, { "epoch": 0.61, "learning_rate": 1.9933532642590215e-06, "logits/chosen": -2.212050676345825, "logits/rejected": -1.8353458642959595, "logps/chosen": -184.3854522705078, "logps/rejected": -143.99754333496094, "loss": 0.6899, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04772832244634628, "rewards/margins": 0.09772919118404388, "rewards/rejected": -0.0500008650124073, "step": 9300 }, { "epoch": 0.61, "eval_logits/chosen": -2.333347797393799, "eval_logits/rejected": -2.1441400051116943, "eval_logps/chosen": -231.38299560546875, "eval_logps/rejected": -219.56932067871094, "eval_loss": 0.689826488494873, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": 0.006219496019184589, "eval_rewards/margins": 0.08579385280609131, "eval_rewards/rejected": -0.079574353992939, "eval_runtime": 709.6682, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 9300 }, { "epoch": 0.61, "learning_rate": 1.987763164139042e-06, "logits/chosen": -2.3631813526153564, "logits/rejected": -2.159517288208008, "logps/chosen": -204.81552124023438, "logps/rejected": -214.0128631591797, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.004395435098558664, "rewards/margins": 0.06943001598119736, "rewards/rejected": -0.06503458321094513, "step": 9310 }, { "epoch": 0.61, "learning_rate": 1.982175736097321e-06, "logits/chosen": -2.047428846359253, "logits/rejected": -2.0620810985565186, "logps/chosen": -275.38446044921875, "logps/rejected": -301.35516357421875, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.008434224873781204, "rewards/margins": 0.08950887620449066, "rewards/rejected": -0.09794311225414276, "step": 9320 }, { "epoch": 0.61, "learning_rate": 1.9765910092806196e-06, "logits/chosen": -2.2493948936462402, "logits/rejected": -2.153007984161377, "logps/chosen": -177.16024780273438, "logps/rejected": -163.21905517578125, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.009152812883257866, "rewards/margins": 0.05717051774263382, "rewards/rejected": -0.048017702996730804, "step": 9330 }, { "epoch": 0.61, "learning_rate": 1.9710090128216083e-06, "logits/chosen": -2.3084399700164795, "logits/rejected": -2.213273525238037, "logps/chosen": -224.1075897216797, "logps/rejected": -218.4535675048828, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.013775345869362354, "rewards/margins": 0.10509101301431656, "rewards/rejected": -0.11886636167764664, "step": 9340 }, { "epoch": 0.61, "learning_rate": 1.9654297758387155e-06, "logits/chosen": -2.1536035537719727, "logits/rejected": -2.131392002105713, "logps/chosen": -163.9993133544922, "logps/rejected": -192.57957458496094, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.031292807310819626, "rewards/margins": 0.07854814827442169, "rewards/rejected": -0.10984095185995102, "step": 9350 }, { "epoch": 0.61, "learning_rate": 1.9598533274359736e-06, "logits/chosen": -2.3260645866394043, "logits/rejected": -2.233445882797241, "logps/chosen": -243.11318969726562, "logps/rejected": -253.74349975585938, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": -0.03432890772819519, "rewards/margins": 0.03268744423985481, "rewards/rejected": -0.0670163482427597, "step": 9360 }, { "epoch": 0.61, "learning_rate": 1.9542796967028697e-06, "logits/chosen": -2.334592819213867, "logits/rejected": -2.218923330307007, "logps/chosen": -219.28659057617188, "logps/rejected": -207.3817596435547, "loss": 0.6917, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01046024076640606, "rewards/margins": 0.0633801594376564, "rewards/rejected": -0.07384039461612701, "step": 9370 }, { "epoch": 0.61, "learning_rate": 1.948708912714192e-06, "logits/chosen": -2.243697166442871, "logits/rejected": -2.0024325847625732, "logps/chosen": -252.65280151367188, "logps/rejected": -229.02334594726562, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05707541108131409, "rewards/margins": 0.07306285202503204, "rewards/rejected": -0.13013826310634613, "step": 9380 }, { "epoch": 0.61, "learning_rate": 1.9431410045298786e-06, "logits/chosen": -2.0937747955322266, "logits/rejected": -2.023585796356201, "logps/chosen": -219.1824951171875, "logps/rejected": -225.77780151367188, "loss": 0.6898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.020571475848555565, "rewards/margins": 0.07571020722389221, "rewards/rejected": -0.09628168493509293, "step": 9390 }, { "epoch": 0.62, "learning_rate": 1.9375760011948654e-06, "logits/chosen": -2.4148128032684326, "logits/rejected": -2.2704997062683105, "logps/chosen": -202.109130859375, "logps/rejected": -234.7781982421875, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024311980232596397, "rewards/margins": 0.10348609834909439, "rewards/rejected": -0.12779806554317474, "step": 9400 }, { "epoch": 0.62, "eval_logits/chosen": -2.3321480751037598, "eval_logits/rejected": -2.1431541442871094, "eval_logps/chosen": -234.85800170898438, "eval_logps/rejected": -222.5006866455078, "eval_loss": 0.6898515224456787, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -0.028530515730381012, "eval_rewards/margins": 0.08035717159509659, "eval_rewards/rejected": -0.1088876873254776, "eval_runtime": 710.5474, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 9400 }, { "epoch": 0.62, "learning_rate": 1.932013931738937e-06, "logits/chosen": -2.310518741607666, "logits/rejected": -2.0845718383789062, "logps/chosen": -207.1325225830078, "logps/rejected": -232.1420135498047, "loss": 0.6856, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03530799597501755, "rewards/margins": 0.12127707153558731, "rewards/rejected": -0.15658505260944366, "step": 9410 }, { "epoch": 0.62, "learning_rate": 1.9264548251765717e-06, "logits/chosen": -2.426779270172119, "logits/rejected": -2.238455295562744, "logps/chosen": -205.3507080078125, "logps/rejected": -209.31491088867188, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015480468980967999, "rewards/margins": 0.07075979560613632, "rewards/rejected": -0.0862402692437172, "step": 9420 }, { "epoch": 0.62, "learning_rate": 1.9208987105067924e-06, "logits/chosen": -2.2212510108947754, "logits/rejected": -2.0826263427734375, "logps/chosen": -216.4222412109375, "logps/rejected": -198.42672729492188, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01865190640091896, "rewards/margins": 0.058921001851558685, "rewards/rejected": -0.07757291197776794, "step": 9430 }, { "epoch": 0.62, "learning_rate": 1.9153456167130154e-06, "logits/chosen": -2.3300156593322754, "logits/rejected": -2.324368715286255, "logps/chosen": -206.3992156982422, "logps/rejected": -240.15884399414062, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023285966366529465, "rewards/margins": 0.0756094753742218, "rewards/rejected": -0.09889544546604156, "step": 9440 }, { "epoch": 0.62, "learning_rate": 1.9097955727628975e-06, "logits/chosen": -2.3564929962158203, "logits/rejected": -2.353801965713501, "logps/chosen": -196.1259765625, "logps/rejected": -216.0775604248047, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0014045886928215623, "rewards/margins": 0.0801662728190422, "rewards/rejected": -0.08157085627317429, "step": 9450 }, { "epoch": 0.62, "learning_rate": 1.904248607608187e-06, "logits/chosen": -2.2641825675964355, "logits/rejected": -2.2951583862304688, "logps/chosen": -257.591552734375, "logps/rejected": -216.25, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.003715710248798132, "rewards/margins": 0.05765196681022644, "rewards/rejected": -0.06136767938733101, "step": 9460 }, { "epoch": 0.62, "learning_rate": 1.8987047501845714e-06, "logits/chosen": -2.3341257572174072, "logits/rejected": -2.302320957183838, "logps/chosen": -166.62255859375, "logps/rejected": -169.2261199951172, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0030190914403647184, "rewards/margins": 0.08849085867404938, "rewards/rejected": -0.08547177165746689, "step": 9470 }, { "epoch": 0.62, "learning_rate": 1.8931640294115267e-06, "logits/chosen": -2.1365644931793213, "logits/rejected": -2.0182435512542725, "logps/chosen": -193.88600158691406, "logps/rejected": -189.7248992919922, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0011992065701633692, "rewards/margins": 0.11369111388921738, "rewards/rejected": -0.11249189078807831, "step": 9480 }, { "epoch": 0.62, "learning_rate": 1.8876264741921662e-06, "logits/chosen": -2.102898120880127, "logits/rejected": -2.123107433319092, "logps/chosen": -189.058837890625, "logps/rejected": -195.1671142578125, "loss": 0.6866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01066638994961977, "rewards/margins": 0.11543774604797363, "rewards/rejected": -0.10477133840322495, "step": 9490 }, { "epoch": 0.62, "learning_rate": 1.8820921134130912e-06, "logits/chosen": -2.3311290740966797, "logits/rejected": -1.958857774734497, "logps/chosen": -230.97128295898438, "logps/rejected": -198.6006317138672, "loss": 0.6871, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.004591288510710001, "rewards/margins": 0.12538619339466095, "rewards/rejected": -0.1299774944782257, "step": 9500 }, { "epoch": 0.62, "eval_logits/chosen": -2.332388162612915, "eval_logits/rejected": -2.143458604812622, "eval_logps/chosen": -232.95985412597656, "eval_logps/rejected": -220.78396606445312, "eval_loss": 0.6898452639579773, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.009548979811370373, "eval_rewards/margins": 0.08217175304889679, "eval_rewards/rejected": -0.09172075241804123, "eval_runtime": 713.2212, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 9500 }, { "epoch": 0.62, "learning_rate": 1.8765609759442378e-06, "logits/chosen": -2.1907570362091064, "logits/rejected": -2.0811009407043457, "logps/chosen": -242.083251953125, "logps/rejected": -238.7690887451172, "loss": 0.6899, "rewards/accuracies": 0.75, "rewards/chosen": -0.0076196156442165375, "rewards/margins": 0.07497996836900711, "rewards/rejected": -0.08259958773851395, "step": 9510 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -2.3943378925323486, "logits/rejected": -2.34335994720459, "logps/chosen": -238.8350372314453, "logps/rejected": -271.54937744140625, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.003474016208201647, "rewards/margins": 0.09124413132667542, "rewards/rejected": -0.09471814334392548, "step": 9520 }, { "epoch": 0.62, "learning_rate": 1.8655084863327222e-06, "logits/chosen": -2.304542303085327, "logits/rejected": -2.324296474456787, "logps/chosen": -183.95468139648438, "logps/rejected": -195.6629180908203, "loss": 0.6922, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.004966600798070431, "rewards/margins": 0.055461425334215164, "rewards/rejected": -0.060428015887737274, "step": 9530 }, { "epoch": 0.62, "learning_rate": 1.8599871918452603e-06, "logits/chosen": -2.1602072715759277, "logits/rejected": -2.1528382301330566, "logps/chosen": -221.7826690673828, "logps/rejected": -245.52969360351562, "loss": 0.6904, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.01309148408472538, "rewards/margins": 0.10497549921274185, "rewards/rejected": -0.09188400954008102, "step": 9540 }, { "epoch": 0.62, "learning_rate": 1.8544692359781192e-06, "logits/chosen": -2.3558590412139893, "logits/rejected": -2.1197030544281006, "logps/chosen": -186.4560546875, "logps/rejected": -162.83966064453125, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.011286085471510887, "rewards/margins": 0.07330868393182755, "rewards/rejected": -0.062022604048252106, "step": 9550 }, { "epoch": 0.63, "learning_rate": 1.8489546475156602e-06, "logits/chosen": -2.5130443572998047, "logits/rejected": -2.286351442337036, "logps/chosen": -218.3134765625, "logps/rejected": -207.4962615966797, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": 0.011793679557740688, "rewards/margins": 0.0735076516866684, "rewards/rejected": -0.06171398237347603, "step": 9560 }, { "epoch": 0.63, "learning_rate": 1.8434434552246778e-06, "logits/chosen": -2.1478981971740723, "logits/rejected": -2.0774495601654053, "logps/chosen": -210.42529296875, "logps/rejected": -207.1792755126953, "loss": 0.6895, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.007937637157738209, "rewards/margins": 0.08016934990882874, "rewards/rejected": -0.08810698240995407, "step": 9570 }, { "epoch": 0.63, "learning_rate": 1.837935687854251e-06, "logits/chosen": -2.3582987785339355, "logits/rejected": -2.111501693725586, "logps/chosen": -219.9694366455078, "logps/rejected": -197.4185333251953, "loss": 0.6883, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.015772990882396698, "rewards/margins": 0.09249218553304672, "rewards/rejected": -0.07671918720006943, "step": 9580 }, { "epoch": 0.63, "learning_rate": 1.832431374135592e-06, "logits/chosen": -2.4764034748077393, "logits/rejected": -2.07975697517395, "logps/chosen": -249.00634765625, "logps/rejected": -241.4837188720703, "loss": 0.687, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.005709916818886995, "rewards/margins": 0.11350512504577637, "rewards/rejected": -0.11921503394842148, "step": 9590 }, { "epoch": 0.63, "learning_rate": 1.8269305427818977e-06, "logits/chosen": -2.4727559089660645, "logits/rejected": -2.327918529510498, "logps/chosen": -208.00125122070312, "logps/rejected": -186.8267059326172, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.006150106433779001, "rewards/margins": 0.06489264965057373, "rewards/rejected": -0.05874254181981087, "step": 9600 }, { "epoch": 0.63, "eval_logits/chosen": -2.3416812419891357, "eval_logits/rejected": -2.1520164012908936, "eval_logps/chosen": -229.97618103027344, "eval_logps/rejected": -218.22509765625, "eval_loss": 0.6898518204689026, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.02028742991387844, "eval_rewards/margins": 0.08641922473907471, "eval_rewards/rejected": -0.06613180041313171, "eval_runtime": 712.8227, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 9600 }, { "epoch": 0.63, "learning_rate": 1.821433222488199e-06, "logits/chosen": -2.339639663696289, "logits/rejected": -2.0263071060180664, "logps/chosen": -221.83694458007812, "logps/rejected": -203.28822326660156, "loss": 0.6891, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.037778157740831375, "rewards/margins": 0.09198880195617676, "rewards/rejected": -0.05421064421534538, "step": 9610 }, { "epoch": 0.63, "learning_rate": 1.8159394419312112e-06, "logits/chosen": -2.377436399459839, "logits/rejected": -2.1873252391815186, "logps/chosen": -256.22357177734375, "logps/rejected": -222.9322967529297, "loss": 0.6871, "rewards/accuracies": 0.75, "rewards/chosen": 0.025064552202820778, "rewards/margins": 0.13247425854206085, "rewards/rejected": -0.10740969330072403, "step": 9620 }, { "epoch": 0.63, "learning_rate": 1.8104492297691845e-06, "logits/chosen": -2.3256943225860596, "logits/rejected": -2.104449510574341, "logps/chosen": -228.5015869140625, "logps/rejected": -213.09912109375, "loss": 0.692, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.019784385338425636, "rewards/margins": 0.08710122108459473, "rewards/rejected": -0.10688559710979462, "step": 9630 }, { "epoch": 0.63, "learning_rate": 1.8049626146417562e-06, "logits/chosen": -2.1221089363098145, "logits/rejected": -1.9787824153900146, "logps/chosen": -155.16111755371094, "logps/rejected": -163.43637084960938, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0008523104479536414, "rewards/margins": 0.11988194286823273, "rewards/rejected": -0.12073423713445663, "step": 9640 }, { "epoch": 0.63, "learning_rate": 1.7994796251697983e-06, "logits/chosen": -2.2313790321350098, "logits/rejected": -2.09123158454895, "logps/chosen": -196.5417022705078, "logps/rejected": -248.27761840820312, "loss": 0.6885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04502807930111885, "rewards/margins": 0.12171129137277603, "rewards/rejected": -0.166739359498024, "step": 9650 }, { "epoch": 0.63, "learning_rate": 1.794000289955269e-06, "logits/chosen": -2.2783684730529785, "logits/rejected": -2.106417417526245, "logps/chosen": -269.64556884765625, "logps/rejected": -246.83901977539062, "loss": 0.6888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.025853093713521957, "rewards/margins": 0.08640275150537491, "rewards/rejected": -0.11225583404302597, "step": 9660 }, { "epoch": 0.63, "learning_rate": 1.7885246375810646e-06, "logits/chosen": -2.2515721321105957, "logits/rejected": -1.9777309894561768, "logps/chosen": -224.28076171875, "logps/rejected": -228.09130859375, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -6.888061761856079e-05, "rewards/margins": 0.07572519034147263, "rewards/rejected": -0.07579407840967178, "step": 9670 }, { "epoch": 0.63, "learning_rate": 1.7830526966108713e-06, "logits/chosen": -2.1138720512390137, "logits/rejected": -1.9187238216400146, "logps/chosen": -189.40728759765625, "logps/rejected": -175.88150024414062, "loss": 0.6841, "rewards/accuracies": 0.75, "rewards/chosen": -0.029343629255890846, "rewards/margins": 0.15085718035697937, "rewards/rejected": -0.18020080029964447, "step": 9680 }, { "epoch": 0.63, "learning_rate": 1.7775844955890129e-06, "logits/chosen": -2.271742343902588, "logits/rejected": -2.1086041927337646, "logps/chosen": -210.3048095703125, "logps/rejected": -209.052978515625, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.012415561825037003, "rewards/margins": 0.103248730301857, "rewards/rejected": -0.0908331573009491, "step": 9690 }, { "epoch": 0.63, "learning_rate": 1.7721200630403046e-06, "logits/chosen": -2.346208095550537, "logits/rejected": -2.157820224761963, "logps/chosen": -195.90628051757812, "logps/rejected": -228.021240234375, "loss": 0.6895, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0014390780124813318, "rewards/margins": 0.06569032371044159, "rewards/rejected": -0.06712940335273743, "step": 9700 }, { "epoch": 0.63, "eval_logits/chosen": -2.3423051834106445, "eval_logits/rejected": -2.1527085304260254, "eval_logps/chosen": -231.52012634277344, "eval_logps/rejected": -219.43946838378906, "eval_loss": 0.6898437142372131, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": 0.004848138429224491, "eval_rewards/margins": 0.08312396705150604, "eval_rewards/rejected": -0.07827582955360413, "eval_runtime": 710.5463, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 9700 }, { "epoch": 0.64, "learning_rate": 1.7666594274699037e-06, "logits/chosen": -2.264838457107544, "logits/rejected": -2.1133649349212646, "logps/chosen": -250.9988555908203, "logps/rejected": -223.15579223632812, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": 0.0169143658131361, "rewards/margins": 0.10361073911190033, "rewards/rejected": -0.08669638633728027, "step": 9710 }, { "epoch": 0.64, "learning_rate": 1.76120261736316e-06, "logits/chosen": -2.314434051513672, "logits/rejected": -1.943964958190918, "logps/chosen": -227.54330444335938, "logps/rejected": -212.75125122070312, "loss": 0.6871, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00828264094889164, "rewards/margins": 0.12351206690073013, "rewards/rejected": -0.13179472088813782, "step": 9720 }, { "epoch": 0.64, "learning_rate": 1.755749661185468e-06, "logits/chosen": -2.382049322128296, "logits/rejected": -1.948427438735962, "logps/chosen": -290.7059631347656, "logps/rejected": -242.9027557373047, "loss": 0.6899, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.03743572160601616, "rewards/margins": 0.1103050485253334, "rewards/rejected": -0.07286933809518814, "step": 9730 }, { "epoch": 0.64, "learning_rate": 1.7503005873821183e-06, "logits/chosen": -2.320734739303589, "logits/rejected": -2.263913631439209, "logps/chosen": -158.90725708007812, "logps/rejected": -193.49929809570312, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.015210744924843311, "rewards/margins": 0.08328135311603546, "rewards/rejected": -0.06807061284780502, "step": 9740 }, { "epoch": 0.64, "learning_rate": 1.744855424378148e-06, "logits/chosen": -2.1784415245056152, "logits/rejected": -2.192660093307495, "logps/chosen": -186.56448364257812, "logps/rejected": -220.04287719726562, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02062476985156536, "rewards/margins": 0.10032866150140762, "rewards/rejected": -0.07970388978719711, "step": 9750 }, { "epoch": 0.64, "learning_rate": 1.7394142005781973e-06, "logits/chosen": -2.121195077896118, "logits/rejected": -2.2074551582336426, "logps/chosen": -260.9930114746094, "logps/rejected": -271.81256103515625, "loss": 0.6932, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007180415093898773, "rewards/margins": 0.07248598337173462, "rewards/rejected": -0.06530557572841644, "step": 9760 }, { "epoch": 0.64, "learning_rate": 1.7339769443663528e-06, "logits/chosen": -2.3068153858184814, "logits/rejected": -2.1621522903442383, "logps/chosen": -142.29673767089844, "logps/rejected": -153.0177764892578, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.006454641930758953, "rewards/margins": 0.10017760843038559, "rewards/rejected": -0.09372296929359436, "step": 9770 }, { "epoch": 0.64, "learning_rate": 1.7285436841060078e-06, "logits/chosen": -2.4975733757019043, "logits/rejected": -2.209691047668457, "logps/chosen": -274.84765625, "logps/rejected": -240.65109252929688, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.012807024642825127, "rewards/margins": 0.08757642656564713, "rewards/rejected": -0.07476940006017685, "step": 9780 }, { "epoch": 0.64, "learning_rate": 1.7231144481397083e-06, "logits/chosen": -2.3894002437591553, "logits/rejected": -2.312439441680908, "logps/chosen": -221.06201171875, "logps/rejected": -201.44149780273438, "loss": 0.6904, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.006636572536081076, "rewards/margins": 0.058181844651699066, "rewards/rejected": -0.06481841951608658, "step": 9790 }, { "epoch": 0.64, "learning_rate": 1.7176892647890092e-06, "logits/chosen": -2.457059383392334, "logits/rejected": -2.2145180702209473, "logps/chosen": -236.02871704101562, "logps/rejected": -202.4846649169922, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0036267780233174562, "rewards/margins": 0.05337395519018173, "rewards/rejected": -0.05700073391199112, "step": 9800 }, { "epoch": 0.64, "eval_logits/chosen": -2.3302435874938965, "eval_logits/rejected": -2.1416378021240234, "eval_logps/chosen": -232.28138732910156, "eval_logps/rejected": -219.88731384277344, "eval_loss": 0.6898448467254639, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.002764492528513074, "eval_rewards/margins": 0.07998983561992645, "eval_rewards/rejected": -0.08275433629751205, "eval_runtime": 711.5893, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 9800 }, { "epoch": 0.64, "learning_rate": 1.7122681623543239e-06, "logits/chosen": -2.4701716899871826, "logits/rejected": -2.205655574798584, "logps/chosen": -245.60400390625, "logps/rejected": -244.6284637451172, "loss": 0.6906, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.013846084475517273, "rewards/margins": 0.10238330066204071, "rewards/rejected": -0.08853721618652344, "step": 9810 }, { "epoch": 0.64, "learning_rate": 1.7068511691147788e-06, "logits/chosen": -2.2074344158172607, "logits/rejected": -2.2472825050354004, "logps/chosen": -191.83944702148438, "logps/rejected": -208.13674926757812, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": 0.02291598729789257, "rewards/margins": 0.07927681505680084, "rewards/rejected": -0.05636082962155342, "step": 9820 }, { "epoch": 0.64, "learning_rate": 1.7014383133280636e-06, "logits/chosen": -2.4390132427215576, "logits/rejected": -2.075756311416626, "logps/chosen": -250.9913330078125, "logps/rejected": -205.5496826171875, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02356928028166294, "rewards/margins": 0.07952290773391724, "rewards/rejected": -0.10309220850467682, "step": 9830 }, { "epoch": 0.64, "learning_rate": 1.696029623230286e-06, "logits/chosen": -2.4049618244171143, "logits/rejected": -2.321725845336914, "logps/chosen": -255.1918487548828, "logps/rejected": -282.19464111328125, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.007304821163415909, "rewards/margins": 0.08223724365234375, "rewards/rejected": -0.07493243366479874, "step": 9840 }, { "epoch": 0.64, "learning_rate": 1.6906251270358229e-06, "logits/chosen": -2.3903801441192627, "logits/rejected": -2.23978853225708, "logps/chosen": -264.2168273925781, "logps/rejected": -220.81857299804688, "loss": 0.6908, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.013624888844788074, "rewards/margins": 0.06343577802181244, "rewards/rejected": -0.07706067711114883, "step": 9850 }, { "epoch": 0.65, "learning_rate": 1.685224852937174e-06, "logits/chosen": -2.1878368854522705, "logits/rejected": -2.0732967853546143, "logps/chosen": -197.29331970214844, "logps/rejected": -254.4430694580078, "loss": 0.683, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.010714609175920486, "rewards/margins": 0.15290780365467072, "rewards/rejected": -0.14219316840171814, "step": 9860 }, { "epoch": 0.65, "learning_rate": 1.6798288291048136e-06, "logits/chosen": -2.151094436645508, "logits/rejected": -2.0521655082702637, "logps/chosen": -224.37606811523438, "logps/rejected": -207.9828643798828, "loss": 0.6871, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.011344604194164276, "rewards/margins": 0.12311786413192749, "rewards/rejected": -0.13446247577667236, "step": 9870 }, { "epoch": 0.65, "learning_rate": 1.6744370836870466e-06, "logits/chosen": -2.555605888366699, "logits/rejected": -2.2639527320861816, "logps/chosen": -333.4253845214844, "logps/rejected": -261.70599365234375, "loss": 0.687, "rewards/accuracies": 0.625, "rewards/chosen": 0.0126813855022192, "rewards/margins": 0.1179838627576828, "rewards/rejected": -0.10530247539281845, "step": 9880 }, { "epoch": 0.65, "learning_rate": 1.6690496448098576e-06, "logits/chosen": -2.2291011810302734, "logits/rejected": -1.9320964813232422, "logps/chosen": -229.88858032226562, "logps/rejected": -217.7265167236328, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -0.008523445576429367, "rewards/margins": 0.0869704857468605, "rewards/rejected": -0.09549392759799957, "step": 9890 }, { "epoch": 0.65, "learning_rate": 1.6636665405767666e-06, "logits/chosen": -2.3363430500030518, "logits/rejected": -2.170015573501587, "logps/chosen": -235.27737426757812, "logps/rejected": -223.20523071289062, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": 0.02493545040488243, "rewards/margins": 0.06817851960659027, "rewards/rejected": -0.043243080377578735, "step": 9900 }, { "epoch": 0.65, "eval_logits/chosen": -2.327425956726074, "eval_logits/rejected": -2.1391117572784424, "eval_logps/chosen": -232.06900024414062, "eval_logps/rejected": -220.34877014160156, "eval_loss": 0.6898226141929626, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": -0.0006405520252883434, "eval_rewards/margins": 0.08672798424959183, "eval_rewards/rejected": -0.08736853301525116, "eval_runtime": 713.8035, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 9900 }, { "epoch": 0.65, "learning_rate": 1.6582877990686827e-06, "logits/chosen": -2.330827474594116, "logits/rejected": -2.275949716567993, "logps/chosen": -120.868408203125, "logps/rejected": -154.09080505371094, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004259592853486538, "rewards/margins": 0.10891057550907135, "rewards/rejected": -0.10465097427368164, "step": 9910 }, { "epoch": 0.65, "learning_rate": 1.6529134483437562e-06, "logits/chosen": -2.333859920501709, "logits/rejected": -2.153388738632202, "logps/chosen": -206.96316528320312, "logps/rejected": -178.53024291992188, "loss": 0.6895, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.013889017514884472, "rewards/margins": 0.11020763963460922, "rewards/rejected": -0.12409665435552597, "step": 9920 }, { "epoch": 0.65, "learning_rate": 1.647543516437233e-06, "logits/chosen": -2.296292781829834, "logits/rejected": -2.237565517425537, "logps/chosen": -202.46505737304688, "logps/rejected": -229.85952758789062, "loss": 0.6896, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.034268446266651154, "rewards/margins": 0.06709616631269455, "rewards/rejected": -0.1013646125793457, "step": 9930 }, { "epoch": 0.65, "learning_rate": 1.6421780313613088e-06, "logits/chosen": -2.445279121398926, "logits/rejected": -2.040759563446045, "logps/chosen": -205.244384765625, "logps/rejected": -181.84715270996094, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.005027498118579388, "rewards/margins": 0.09792643785476685, "rewards/rejected": -0.10295393317937851, "step": 9940 }, { "epoch": 0.65, "learning_rate": 1.6368170211049816e-06, "logits/chosen": -2.3535354137420654, "logits/rejected": -1.8584728240966797, "logps/chosen": -284.0115661621094, "logps/rejected": -234.0505828857422, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0021137488074600697, "rewards/margins": 0.10150531679391861, "rewards/rejected": -0.1036190614104271, "step": 9950 }, { "epoch": 0.65, "learning_rate": 1.6314605136339074e-06, "logits/chosen": -2.3524794578552246, "logits/rejected": -2.1899707317352295, "logps/chosen": -198.07577514648438, "logps/rejected": -187.40274047851562, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.024890681728720665, "rewards/margins": 0.07226412743330002, "rewards/rejected": -0.09715481102466583, "step": 9960 }, { "epoch": 0.65, "learning_rate": 1.6261085368902526e-06, "logits/chosen": -2.551018238067627, "logits/rejected": -2.202836751937866, "logps/chosen": -265.8411865234375, "logps/rejected": -229.21688842773438, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009003100916743279, "rewards/margins": 0.07005171477794647, "rewards/rejected": -0.0790548101067543, "step": 9970 }, { "epoch": 0.65, "learning_rate": 1.6207611187925503e-06, "logits/chosen": -2.231933832168579, "logits/rejected": -2.2483532428741455, "logps/chosen": -209.96499633789062, "logps/rejected": -265.6410827636719, "loss": 0.6867, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011381099000573158, "rewards/margins": 0.09358282387256622, "rewards/rejected": -0.10496392101049423, "step": 9980 }, { "epoch": 0.65, "learning_rate": 1.6154182872355512e-06, "logits/chosen": -2.2878527641296387, "logits/rejected": -2.338225841522217, "logps/chosen": -167.45220947265625, "logps/rejected": -189.28424072265625, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03634749725461006, "rewards/margins": 0.07412412762641907, "rewards/rejected": -0.11047162860631943, "step": 9990 }, { "epoch": 0.65, "learning_rate": 1.610080070090084e-06, "logits/chosen": -2.275801181793213, "logits/rejected": -2.2091450691223145, "logps/chosen": -186.84176635742188, "logps/rejected": -186.8584442138672, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.059204794466495514, "rewards/margins": 0.09045000374317169, "rewards/rejected": -0.149654820561409, "step": 10000 }, { "epoch": 0.65, "eval_logits/chosen": -2.322659730911255, "eval_logits/rejected": -2.1345160007476807, "eval_logps/chosen": -233.91151428222656, "eval_logps/rejected": -222.27159118652344, "eval_loss": 0.6898568272590637, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.01906559430062771, "eval_rewards/margins": 0.08753134310245514, "eval_rewards/rejected": -0.1065969467163086, "eval_runtime": 713.7271, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 10000 }, { "epoch": 0.65, "learning_rate": 1.6047464952029034e-06, "logits/chosen": -2.4548022747039795, "logits/rejected": -2.32185697555542, "logps/chosen": -254.0906219482422, "logps/rejected": -272.3085021972656, "loss": 0.6896, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.001967624295502901, "rewards/margins": 0.09991808235645294, "rewards/rejected": -0.09795045852661133, "step": 10010 }, { "epoch": 0.66, "learning_rate": 1.5994175903965486e-06, "logits/chosen": -2.183396577835083, "logits/rejected": -2.0627362728118896, "logps/chosen": -254.3908233642578, "logps/rejected": -266.36431884765625, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04060421884059906, "rewards/margins": 0.10219021886587143, "rewards/rejected": -0.1427944153547287, "step": 10020 }, { "epoch": 0.66, "learning_rate": 1.5940933834691977e-06, "logits/chosen": -2.6259872913360596, "logits/rejected": -1.9777504205703735, "logps/chosen": -307.5574951171875, "logps/rejected": -208.10971069335938, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.022915838286280632, "rewards/margins": 0.08449498564004898, "rewards/rejected": -0.10741082578897476, "step": 10030 }, { "epoch": 0.66, "learning_rate": 1.588773902194522e-06, "logits/chosen": -2.1228013038635254, "logits/rejected": -1.8389813899993896, "logps/chosen": -208.42453002929688, "logps/rejected": -221.58456420898438, "loss": 0.693, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.04344822093844414, "rewards/margins": 0.1740039587020874, "rewards/rejected": -0.21745216846466064, "step": 10040 }, { "epoch": 0.66, "learning_rate": 1.583459174321541e-06, "logits/chosen": -2.1011738777160645, "logits/rejected": -1.9465796947479248, "logps/chosen": -213.76220703125, "logps/rejected": -196.18130493164062, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": -0.04144889861345291, "rewards/margins": 0.09513147920370102, "rewards/rejected": -0.13658036291599274, "step": 10050 }, { "epoch": 0.66, "learning_rate": 1.5781492275744797e-06, "logits/chosen": -2.5177080631256104, "logits/rejected": -2.132356882095337, "logps/chosen": -295.3465881347656, "logps/rejected": -280.4678039550781, "loss": 0.6918, "rewards/accuracies": 0.875, "rewards/chosen": -0.011021384038031101, "rewards/margins": 0.11339070647954941, "rewards/rejected": -0.12441209703683853, "step": 10060 }, { "epoch": 0.66, "learning_rate": 1.5728440896526215e-06, "logits/chosen": -2.2357475757598877, "logits/rejected": -2.054103374481201, "logps/chosen": -276.3549499511719, "logps/rejected": -238.15597534179688, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.02952994778752327, "rewards/margins": 0.08003853261470795, "rewards/rejected": -0.10956847667694092, "step": 10070 }, { "epoch": 0.66, "learning_rate": 1.5675437882301633e-06, "logits/chosen": -2.3144021034240723, "logits/rejected": -2.1165995597839355, "logps/chosen": -223.0723419189453, "logps/rejected": -191.2317352294922, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0402815006673336, "rewards/margins": 0.02955157496035099, "rewards/rejected": -0.06983307749032974, "step": 10080 }, { "epoch": 0.66, "learning_rate": 1.5622483509560748e-06, "logits/chosen": -2.194026470184326, "logits/rejected": -2.2077908515930176, "logps/chosen": -173.01951599121094, "logps/rejected": -214.22494506835938, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.022903580218553543, "rewards/margins": 0.11023157835006714, "rewards/rejected": -0.13313516974449158, "step": 10090 }, { "epoch": 0.66, "learning_rate": 1.5569578054539506e-06, "logits/chosen": -2.266801357269287, "logits/rejected": -1.928422212600708, "logps/chosen": -279.8221130371094, "logps/rejected": -209.8824920654297, "loss": 0.6859, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0062502906657755375, "rewards/margins": 0.13923415541648865, "rewards/rejected": -0.14548444747924805, "step": 10100 }, { "epoch": 0.66, "eval_logits/chosen": -2.3166568279266357, "eval_logits/rejected": -2.12905216217041, "eval_logps/chosen": -234.25631713867188, "eval_logps/rejected": -222.29376220703125, "eval_loss": 0.689877450466156, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.02251364104449749, "eval_rewards/margins": 0.08430492877960205, "eval_rewards/rejected": -0.10681857168674469, "eval_runtime": 714.2219, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 10100 }, { "epoch": 0.66, "learning_rate": 1.551672179321867e-06, "logits/chosen": -2.239980459213257, "logits/rejected": -2.30826735496521, "logps/chosen": -217.06527709960938, "logps/rejected": -203.61270141601562, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.012083550915122032, "rewards/margins": 0.07718555629253387, "rewards/rejected": -0.08926911652088165, "step": 10110 }, { "epoch": 0.66, "learning_rate": 1.5463915001322398e-06, "logits/chosen": -2.280668258666992, "logits/rejected": -2.135650157928467, "logps/chosen": -249.37588500976562, "logps/rejected": -242.8144073486328, "loss": 0.6881, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03719538077712059, "rewards/margins": 0.07806181162595749, "rewards/rejected": -0.11525720357894897, "step": 10120 }, { "epoch": 0.66, "learning_rate": 1.5411157954316784e-06, "logits/chosen": -2.2775256633758545, "logits/rejected": -2.181715250015259, "logps/chosen": -201.49807739257812, "logps/rejected": -196.48684692382812, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.003794357879087329, "rewards/margins": 0.08678573369979858, "rewards/rejected": -0.08299137651920319, "step": 10130 }, { "epoch": 0.66, "learning_rate": 1.535845092740843e-06, "logits/chosen": -2.4420783519744873, "logits/rejected": -2.282604217529297, "logps/chosen": -237.28085327148438, "logps/rejected": -254.278076171875, "loss": 0.6909, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.00040371305658482015, "rewards/margins": 0.04601416364312172, "rewards/rejected": -0.0456104576587677, "step": 10140 }, { "epoch": 0.66, "learning_rate": 1.5305794195543005e-06, "logits/chosen": -2.296903133392334, "logits/rejected": -2.3559365272521973, "logps/chosen": -206.9901123046875, "logps/rejected": -198.97463989257812, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0018900141585618258, "rewards/margins": 0.08915947377681732, "rewards/rejected": -0.08726945519447327, "step": 10150 }, { "epoch": 0.66, "learning_rate": 1.5253188033403816e-06, "logits/chosen": -2.321024179458618, "logits/rejected": -2.416475772857666, "logps/chosen": -171.10528564453125, "logps/rejected": -191.79251098632812, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0037040214519947767, "rewards/margins": 0.0402885265648365, "rewards/rejected": -0.0439925454556942, "step": 10160 }, { "epoch": 0.67, "learning_rate": 1.520063271541037e-06, "logits/chosen": -2.296424150466919, "logits/rejected": -2.201472282409668, "logps/chosen": -178.9801483154297, "logps/rejected": -180.84454345703125, "loss": 0.6852, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00475387554615736, "rewards/margins": 0.1316739171743393, "rewards/rejected": -0.13642781972885132, "step": 10170 }, { "epoch": 0.67, "learning_rate": 1.5148128515716954e-06, "logits/chosen": -2.529043674468994, "logits/rejected": -1.8911247253417969, "logps/chosen": -264.5617370605469, "logps/rejected": -202.96563720703125, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01967450976371765, "rewards/margins": 0.10778073221445084, "rewards/rejected": -0.08810621500015259, "step": 10180 }, { "epoch": 0.67, "learning_rate": 1.5095675708211197e-06, "logits/chosen": -2.348248243331909, "logits/rejected": -2.340000629425049, "logps/chosen": -194.94430541992188, "logps/rejected": -222.9235382080078, "loss": 0.6904, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.03783398121595383, "rewards/margins": 0.046356312930583954, "rewards/rejected": -0.08419029414653778, "step": 10190 }, { "epoch": 0.67, "learning_rate": 1.504327456651263e-06, "logits/chosen": -2.28633189201355, "logits/rejected": -2.213207244873047, "logps/chosen": -262.3058166503906, "logps/rejected": -244.20077514648438, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.009028220549225807, "rewards/margins": 0.08859656751155853, "rewards/rejected": -0.09762479364871979, "step": 10200 }, { "epoch": 0.67, "eval_logits/chosen": -2.3150970935821533, "eval_logits/rejected": -2.127429723739624, "eval_logps/chosen": -231.98062133789062, "eval_logps/rejected": -220.61843872070312, "eval_loss": 0.6898481249809265, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": 0.00024335407942999154, "eval_rewards/margins": 0.09030859917402267, "eval_rewards/rejected": -0.09006524831056595, "eval_runtime": 712.515, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 10200 }, { "epoch": 0.67, "learning_rate": 1.4990925363971284e-06, "logits/chosen": -2.370110034942627, "logits/rejected": -1.957165002822876, "logps/chosen": -289.9840087890625, "logps/rejected": -239.0626983642578, "loss": 0.6878, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0006083324551582336, "rewards/margins": 0.17567750811576843, "rewards/rejected": -0.17628583312034607, "step": 10210 }, { "epoch": 0.67, "learning_rate": 1.4938628373666236e-06, "logits/chosen": -2.2375118732452393, "logits/rejected": -2.292834997177124, "logps/chosen": -172.5015106201172, "logps/rejected": -181.63931274414062, "loss": 0.6917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0007777426508255303, "rewards/margins": 0.07265160232782364, "rewards/rejected": -0.07187385857105255, "step": 10220 }, { "epoch": 0.67, "learning_rate": 1.4886383868404203e-06, "logits/chosen": -2.143146514892578, "logits/rejected": -2.0093531608581543, "logps/chosen": -163.1758575439453, "logps/rejected": -161.16677856445312, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": -0.019540909677743912, "rewards/margins": 0.08020851016044617, "rewards/rejected": -0.09974941611289978, "step": 10230 }, { "epoch": 0.67, "learning_rate": 1.483419212071813e-06, "logits/chosen": -2.097904920578003, "logits/rejected": -1.9259834289550781, "logps/chosen": -188.40487670898438, "logps/rejected": -189.32362365722656, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.000592652999330312, "rewards/margins": 0.0810011625289917, "rewards/rejected": -0.08040851354598999, "step": 10240 }, { "epoch": 0.67, "learning_rate": 1.478205340286573e-06, "logits/chosen": -2.2346534729003906, "logits/rejected": -2.2042670249938965, "logps/chosen": -209.13229370117188, "logps/rejected": -199.13385009765625, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.05523378401994705, "rewards/margins": 0.08006395399570465, "rewards/rejected": -0.1352977454662323, "step": 10250 }, { "epoch": 0.67, "learning_rate": 1.4729967986828104e-06, "logits/chosen": -2.3945605754852295, "logits/rejected": -2.156639575958252, "logps/chosen": -321.37115478515625, "logps/rejected": -282.4629211425781, "loss": 0.69, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006125994957983494, "rewards/margins": 0.0905977264046669, "rewards/rejected": -0.08447173237800598, "step": 10260 }, { "epoch": 0.67, "learning_rate": 1.4677936144308286e-06, "logits/chosen": -2.3705971240997314, "logits/rejected": -2.0719449520111084, "logps/chosen": -222.4903564453125, "logps/rejected": -207.2783966064453, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.017543844878673553, "rewards/margins": 0.12851087749004364, "rewards/rejected": -0.11096702516078949, "step": 10270 }, { "epoch": 0.67, "learning_rate": 1.4625958146729864e-06, "logits/chosen": -2.386382818222046, "logits/rejected": -2.2117514610290527, "logps/chosen": -220.2921142578125, "logps/rejected": -212.52377319335938, "loss": 0.6898, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.00028302668943069875, "rewards/margins": 0.08179637044668198, "rewards/rejected": -0.08207939565181732, "step": 10280 }, { "epoch": 0.67, "learning_rate": 1.4574034265235523e-06, "logits/chosen": -2.474156141281128, "logits/rejected": -1.9117343425750732, "logps/chosen": -254.64987182617188, "logps/rejected": -168.7136688232422, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.011786472983658314, "rewards/margins": 0.11826670169830322, "rewards/rejected": -0.10648022592067719, "step": 10290 }, { "epoch": 0.67, "learning_rate": 1.452216477068568e-06, "logits/chosen": -2.342738389968872, "logits/rejected": -1.8315894603729248, "logps/chosen": -229.30612182617188, "logps/rejected": -147.89767456054688, "loss": 0.6876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007388654164969921, "rewards/margins": 0.11506316810846329, "rewards/rejected": -0.10767451673746109, "step": 10300 }, { "epoch": 0.67, "eval_logits/chosen": -2.3180654048919678, "eval_logits/rejected": -2.130103826522827, "eval_logps/chosen": -231.863525390625, "eval_logps/rejected": -219.8981475830078, "eval_loss": 0.6898233294487, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": 0.0014140387065708637, "eval_rewards/margins": 0.08427631109952927, "eval_rewards/rejected": -0.08286228775978088, "eval_runtime": 713.5886, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 10300 }, { "epoch": 0.67, "learning_rate": 1.4470349933657004e-06, "logits/chosen": -2.5185744762420654, "logits/rejected": -2.2852203845977783, "logps/chosen": -220.93069458007812, "logps/rejected": -204.84841918945312, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.002010664436966181, "rewards/margins": 0.07896588742733002, "rewards/rejected": -0.08097656071186066, "step": 10310 }, { "epoch": 0.68, "learning_rate": 1.4418590024441096e-06, "logits/chosen": -2.41025972366333, "logits/rejected": -2.01489520072937, "logps/chosen": -250.54312133789062, "logps/rejected": -192.83309936523438, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.029171252623200417, "rewards/margins": 0.08775301277637482, "rewards/rejected": -0.05858175829052925, "step": 10320 }, { "epoch": 0.68, "learning_rate": 1.436688531304297e-06, "logits/chosen": -2.404268264770508, "logits/rejected": -2.0696969032287598, "logps/chosen": -216.67111206054688, "logps/rejected": -219.65896606445312, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01656418666243553, "rewards/margins": 0.08477049320936203, "rewards/rejected": -0.0682063102722168, "step": 10330 }, { "epoch": 0.68, "learning_rate": 1.431523606917974e-06, "logits/chosen": -2.224674940109253, "logits/rejected": -2.2193877696990967, "logps/chosen": -207.0742950439453, "logps/rejected": -223.9700164794922, "loss": 0.6865, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.019749853760004044, "rewards/margins": 0.10281027853488922, "rewards/rejected": -0.12256012111902237, "step": 10340 }, { "epoch": 0.68, "learning_rate": 1.4263642562279162e-06, "logits/chosen": -2.0139873027801514, "logits/rejected": -1.9893707036972046, "logps/chosen": -249.74459838867188, "logps/rejected": -266.02679443359375, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.002572817262262106, "rewards/margins": 0.07792092114686966, "rewards/rejected": -0.0804937332868576, "step": 10350 }, { "epoch": 0.68, "learning_rate": 1.4212105061478257e-06, "logits/chosen": -2.0695881843566895, "logits/rejected": -2.087667465209961, "logps/chosen": -229.23046875, "logps/rejected": -247.36196899414062, "loss": 0.6886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006600628606975079, "rewards/margins": 0.10308027267456055, "rewards/rejected": -0.09647965431213379, "step": 10360 }, { "epoch": 0.68, "learning_rate": 1.4160623835621848e-06, "logits/chosen": -2.4251503944396973, "logits/rejected": -2.291647434234619, "logps/chosen": -235.0546875, "logps/rejected": -231.79080200195312, "loss": 0.6901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022564858198165894, "rewards/margins": 0.08214254677295685, "rewards/rejected": -0.05957768112421036, "step": 10370 }, { "epoch": 0.68, "learning_rate": 1.4109199153261249e-06, "logits/chosen": -2.2098567485809326, "logits/rejected": -2.1036949157714844, "logps/chosen": -273.9472961425781, "logps/rejected": -253.73007202148438, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": 0.02047353982925415, "rewards/margins": 0.1009642630815506, "rewards/rejected": -0.08049070835113525, "step": 10380 }, { "epoch": 0.68, "learning_rate": 1.405783128265278e-06, "logits/chosen": -2.2883942127227783, "logits/rejected": -2.265260934829712, "logps/chosen": -204.190185546875, "logps/rejected": -207.59140014648438, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0005667827790603042, "rewards/margins": 0.06667406111955643, "rewards/rejected": -0.06724084168672562, "step": 10390 }, { "epoch": 0.68, "learning_rate": 1.4006520491756427e-06, "logits/chosen": -2.389329433441162, "logits/rejected": -2.1550679206848145, "logps/chosen": -194.2144012451172, "logps/rejected": -140.29342651367188, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.020098352804780006, "rewards/margins": 0.10719966888427734, "rewards/rejected": -0.08710131794214249, "step": 10400 }, { "epoch": 0.68, "eval_logits/chosen": -2.3169994354248047, "eval_logits/rejected": -2.129032611846924, "eval_logps/chosen": -230.2224578857422, "eval_logps/rejected": -218.50982666015625, "eval_loss": 0.6898374557495117, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.017825065180659294, "eval_rewards/margins": 0.08680439740419388, "eval_rewards/rejected": -0.06897933036088943, "eval_runtime": 714.2722, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 10400 }, { "epoch": 0.68, "learning_rate": 1.39552670482344e-06, "logits/chosen": -2.207252264022827, "logits/rejected": -2.280210018157959, "logps/chosen": -173.74078369140625, "logps/rejected": -181.15975952148438, "loss": 0.6907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00858780462294817, "rewards/margins": 0.07527503371238708, "rewards/rejected": -0.06668722629547119, "step": 10410 }, { "epoch": 0.68, "learning_rate": 1.3904071219449776e-06, "logits/chosen": -2.2979884147644043, "logits/rejected": -1.8623745441436768, "logps/chosen": -196.11106872558594, "logps/rejected": -119.69913482666016, "loss": 0.6895, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.037905577570199966, "rewards/margins": 0.08248183131217957, "rewards/rejected": -0.0445762574672699, "step": 10420 }, { "epoch": 0.68, "learning_rate": 1.3852933272465068e-06, "logits/chosen": -2.458289384841919, "logits/rejected": -2.22627592086792, "logps/chosen": -234.090087890625, "logps/rejected": -206.09585571289062, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03770025074481964, "rewards/margins": 0.07229628413915634, "rewards/rejected": -0.0345960296690464, "step": 10430 }, { "epoch": 0.68, "learning_rate": 1.3801853474040873e-06, "logits/chosen": -2.254831552505493, "logits/rejected": -2.187377452850342, "logps/chosen": -236.34634399414062, "logps/rejected": -230.05392456054688, "loss": 0.6891, "rewards/accuracies": 0.75, "rewards/chosen": 0.021748732775449753, "rewards/margins": 0.10167870670557022, "rewards/rejected": -0.07992996275424957, "step": 10440 }, { "epoch": 0.68, "learning_rate": 1.3750832090634417e-06, "logits/chosen": -2.4042470455169678, "logits/rejected": -2.09578537940979, "logps/chosen": -181.5123291015625, "logps/rejected": -175.58468627929688, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04660337418317795, "rewards/margins": 0.08723708242177963, "rewards/rejected": -0.040633708238601685, "step": 10450 }, { "epoch": 0.68, "learning_rate": 1.3699869388398245e-06, "logits/chosen": -2.2707431316375732, "logits/rejected": -2.1249325275421143, "logps/chosen": -209.69296264648438, "logps/rejected": -199.55191040039062, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03328350931406021, "rewards/margins": 0.08312380313873291, "rewards/rejected": -0.049840297549963, "step": 10460 }, { "epoch": 0.69, "learning_rate": 1.3648965633178772e-06, "logits/chosen": -2.2864489555358887, "logits/rejected": -2.1960525512695312, "logps/chosen": -197.67889404296875, "logps/rejected": -219.58993530273438, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": 0.030532773584127426, "rewards/margins": 0.10499455034732819, "rewards/rejected": -0.07446177303791046, "step": 10470 }, { "epoch": 0.69, "learning_rate": 1.3598121090514938e-06, "logits/chosen": -2.275895357131958, "logits/rejected": -2.1503143310546875, "logps/chosen": -180.6181640625, "logps/rejected": -165.47055053710938, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02389690652489662, "rewards/margins": 0.08975638449192047, "rewards/rejected": -0.06585947424173355, "step": 10480 }, { "epoch": 0.69, "learning_rate": 1.3547336025636753e-06, "logits/chosen": -2.2028050422668457, "logits/rejected": -1.984891653060913, "logps/chosen": -273.01007080078125, "logps/rejected": -239.7392120361328, "loss": 0.6903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.022864457219839096, "rewards/margins": 0.08258132636547089, "rewards/rejected": -0.059716880321502686, "step": 10490 }, { "epoch": 0.69, "learning_rate": 1.3496610703464022e-06, "logits/chosen": -2.351762533187866, "logits/rejected": -2.1159586906433105, "logps/chosen": -227.1230926513672, "logps/rejected": -190.7210693359375, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020914912223815918, "rewards/margins": 0.09119327366352081, "rewards/rejected": -0.0702783390879631, "step": 10500 }, { "epoch": 0.69, "eval_logits/chosen": -2.320483684539795, "eval_logits/rejected": -2.132249593734741, "eval_logps/chosen": -229.9177703857422, "eval_logps/rejected": -217.9020538330078, "eval_loss": 0.6898301839828491, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": 0.0208718404173851, "eval_rewards/margins": 0.08377327024936676, "eval_rewards/rejected": -0.06290142238140106, "eval_runtime": 713.7053, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 10500 }, { "epoch": 0.69, "learning_rate": 1.3445945388604848e-06, "logits/chosen": -2.213752508163452, "logits/rejected": -1.9681813716888428, "logps/chosen": -239.2418670654297, "logps/rejected": -207.7646942138672, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010183857753872871, "rewards/margins": 0.1168348640203476, "rewards/rejected": -0.12701871991157532, "step": 10510 }, { "epoch": 0.69, "learning_rate": 1.3395340345354358e-06, "logits/chosen": -2.238507032394409, "logits/rejected": -2.3673980236053467, "logps/chosen": -223.3243408203125, "logps/rejected": -249.09719848632812, "loss": 0.6879, "rewards/accuracies": 0.625, "rewards/chosen": 0.003734863130375743, "rewards/margins": 0.08525262027978897, "rewards/rejected": -0.08151774853467941, "step": 10520 }, { "epoch": 0.69, "learning_rate": 1.334479583769322e-06, "logits/chosen": -2.470689296722412, "logits/rejected": -2.1623778343200684, "logps/chosen": -252.4402313232422, "logps/rejected": -207.7847137451172, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.023985490202903748, "rewards/margins": 0.05314163491129875, "rewards/rejected": -0.029156142845749855, "step": 10530 }, { "epoch": 0.69, "learning_rate": 1.3294312129286366e-06, "logits/chosen": -2.312894344329834, "logits/rejected": -2.193312168121338, "logps/chosen": -263.58502197265625, "logps/rejected": -255.36056518554688, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.031435929238796234, "rewards/margins": 0.05446425825357437, "rewards/rejected": -0.023028332740068436, "step": 10540 }, { "epoch": 0.69, "learning_rate": 1.324388948348153e-06, "logits/chosen": -2.469510078430176, "logits/rejected": -2.076355218887329, "logps/chosen": -283.5762023925781, "logps/rejected": -210.35165405273438, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.016617698594927788, "rewards/margins": 0.0814189538359642, "rewards/rejected": -0.06480126827955246, "step": 10550 }, { "epoch": 0.69, "learning_rate": 1.319352816330796e-06, "logits/chosen": -2.5570406913757324, "logits/rejected": -2.0373148918151855, "logps/chosen": -281.7043151855469, "logps/rejected": -184.83396911621094, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.014928947202861309, "rewards/margins": 0.09585042297840118, "rewards/rejected": -0.0809214860200882, "step": 10560 }, { "epoch": 0.69, "learning_rate": 1.314322843147494e-06, "logits/chosen": -2.155149221420288, "logits/rejected": -2.2533445358276367, "logps/chosen": -172.06494140625, "logps/rejected": -234.2112579345703, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.017391042783856392, "rewards/margins": 0.07278571277856827, "rewards/rejected": -0.09017674624919891, "step": 10570 }, { "epoch": 0.69, "learning_rate": 1.3092990550370526e-06, "logits/chosen": -2.4301834106445312, "logits/rejected": -2.0875182151794434, "logps/chosen": -345.9046325683594, "logps/rejected": -267.7071228027344, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0014622181188315153, "rewards/margins": 0.07153777033090591, "rewards/rejected": -0.0729999840259552, "step": 10580 }, { "epoch": 0.69, "learning_rate": 1.3042814782060131e-06, "logits/chosen": -2.3605728149414062, "logits/rejected": -2.063471794128418, "logps/chosen": -177.5745849609375, "logps/rejected": -164.29254150390625, "loss": 0.6882, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.03928259015083313, "rewards/margins": 0.10634209215641022, "rewards/rejected": -0.06705950945615768, "step": 10590 }, { "epoch": 0.69, "learning_rate": 1.2992701388285112e-06, "logits/chosen": -2.371594190597534, "logits/rejected": -2.116765260696411, "logps/chosen": -266.95880126953125, "logps/rejected": -229.24685668945312, "loss": 0.6893, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.02390308678150177, "rewards/margins": 0.06760050356388092, "rewards/rejected": -0.04369742050766945, "step": 10600 }, { "epoch": 0.69, "eval_logits/chosen": -2.317068099975586, "eval_logits/rejected": -2.1291825771331787, "eval_logps/chosen": -230.43104553222656, "eval_logps/rejected": -218.47349548339844, "eval_loss": 0.689825177192688, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": 0.015739070251584053, "eval_rewards/margins": 0.08435513079166412, "eval_rewards/rejected": -0.06861607730388641, "eval_runtime": 714.2332, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 10600 }, { "epoch": 0.69, "learning_rate": 1.29426506304615e-06, "logits/chosen": -2.2151741981506348, "logits/rejected": -2.148871898651123, "logps/chosen": -225.4136199951172, "logps/rejected": -209.6869354248047, "loss": 0.6934, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0269019715487957, "rewards/margins": 0.03751341626048088, "rewards/rejected": -0.06441538780927658, "step": 10610 }, { "epoch": 0.69, "learning_rate": 1.289266276967855e-06, "logits/chosen": -2.364128589630127, "logits/rejected": -2.249577760696411, "logps/chosen": -337.41046142578125, "logps/rejected": -254.139892578125, "loss": 0.6915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.011713572777807713, "rewards/margins": 0.06887595355510712, "rewards/rejected": -0.05716238543391228, "step": 10620 }, { "epoch": 0.7, "learning_rate": 1.284273806669745e-06, "logits/chosen": -2.3198752403259277, "logits/rejected": -2.095829963684082, "logps/chosen": -241.2201690673828, "logps/rejected": -268.35296630859375, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": -0.018951773643493652, "rewards/margins": 0.09592192620038986, "rewards/rejected": -0.1148737221956253, "step": 10630 }, { "epoch": 0.7, "learning_rate": 1.2792876781949884e-06, "logits/chosen": -2.0602927207946777, "logits/rejected": -1.8088810443878174, "logps/chosen": -207.2493133544922, "logps/rejected": -195.7181396484375, "loss": 0.6884, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.011766849085688591, "rewards/margins": 0.07786474376916885, "rewards/rejected": -0.08963160216808319, "step": 10640 }, { "epoch": 0.7, "learning_rate": 1.274307917553676e-06, "logits/chosen": -2.3178839683532715, "logits/rejected": -2.2395923137664795, "logps/chosen": -196.51788330078125, "logps/rejected": -232.0148162841797, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.008591088466346264, "rewards/margins": 0.10897374153137207, "rewards/rejected": -0.11756483465433121, "step": 10650 }, { "epoch": 0.7, "learning_rate": 1.2693345507226767e-06, "logits/chosen": -2.1067943572998047, "logits/rejected": -2.191366672515869, "logps/chosen": -230.4332733154297, "logps/rejected": -244.55105590820312, "loss": 0.6875, "rewards/accuracies": 0.75, "rewards/chosen": -0.004430090077221394, "rewards/margins": 0.12004270404577255, "rewards/rejected": -0.12447279691696167, "step": 10660 }, { "epoch": 0.7, "learning_rate": 1.2643676036455099e-06, "logits/chosen": -2.3527565002441406, "logits/rejected": -2.2609760761260986, "logps/chosen": -288.02587890625, "logps/rejected": -241.6444854736328, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0017340302001684904, "rewards/margins": 0.043596215546131134, "rewards/rejected": -0.045330245047807693, "step": 10670 }, { "epoch": 0.7, "learning_rate": 1.259407102232203e-06, "logits/chosen": -2.452852725982666, "logits/rejected": -2.0722763538360596, "logps/chosen": -277.25531005859375, "logps/rejected": -212.7763671875, "loss": 0.6883, "rewards/accuracies": 0.75, "rewards/chosen": 0.0037903212942183018, "rewards/margins": 0.08465041220188141, "rewards/rejected": -0.08086008578538895, "step": 10680 }, { "epoch": 0.7, "learning_rate": 1.254453072359163e-06, "logits/chosen": -2.389622211456299, "logits/rejected": -2.1636927127838135, "logps/chosen": -232.2378387451172, "logps/rejected": -217.2246551513672, "loss": 0.6892, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.017822107300162315, "rewards/margins": 0.06853047758340836, "rewards/rejected": -0.05070837587118149, "step": 10690 }, { "epoch": 0.7, "learning_rate": 1.2495055398690337e-06, "logits/chosen": -2.4718260765075684, "logits/rejected": -2.228008508682251, "logps/chosen": -228.7391357421875, "logps/rejected": -225.41854858398438, "loss": 0.6907, "rewards/accuracies": 0.5, "rewards/chosen": 0.011152736842632294, "rewards/margins": 0.05962613224983215, "rewards/rejected": -0.04847339540719986, "step": 10700 }, { "epoch": 0.7, "eval_logits/chosen": -2.3170278072357178, "eval_logits/rejected": -2.1292710304260254, "eval_logps/chosen": -230.355224609375, "eval_logps/rejected": -218.42803955078125, "eval_loss": 0.6898084878921509, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": 0.016497209668159485, "eval_rewards/margins": 0.08465855568647385, "eval_rewards/rejected": -0.06816134601831436, "eval_runtime": 714.9419, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 10700 }, { "epoch": 0.7, "learning_rate": 1.2445645305705718e-06, "logits/chosen": -2.4910426139831543, "logits/rejected": -2.1560773849487305, "logps/chosen": -206.70126342773438, "logps/rejected": -192.34877014160156, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0038032480515539646, "rewards/margins": 0.06290373206138611, "rewards/rejected": -0.06670697778463364, "step": 10710 }, { "epoch": 0.7, "learning_rate": 1.2396300702384995e-06, "logits/chosen": -2.4755501747131348, "logits/rejected": -2.2301669120788574, "logps/chosen": -256.727294921875, "logps/rejected": -224.15756225585938, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.010807778686285019, "rewards/margins": 0.05095580965280533, "rewards/rejected": -0.04014802724123001, "step": 10720 }, { "epoch": 0.7, "learning_rate": 1.234702184613381e-06, "logits/chosen": -2.0976624488830566, "logits/rejected": -2.176741600036621, "logps/chosen": -212.48825073242188, "logps/rejected": -221.2858428955078, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.029940495267510414, "rewards/margins": 0.09133056551218033, "rewards/rejected": -0.06139007955789566, "step": 10730 }, { "epoch": 0.7, "learning_rate": 1.2297808994014793e-06, "logits/chosen": -2.411856174468994, "logits/rejected": -2.16345477104187, "logps/chosen": -282.15887451171875, "logps/rejected": -253.8297882080078, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03775983303785324, "rewards/margins": 0.06563454121351242, "rewards/rejected": -0.02787470817565918, "step": 10740 }, { "epoch": 0.7, "learning_rate": 1.2248662402746314e-06, "logits/chosen": -2.251694679260254, "logits/rejected": -2.121366500854492, "logps/chosen": -191.34400939941406, "logps/rejected": -191.785400390625, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.014273548498749733, "rewards/margins": 0.0655721127986908, "rewards/rejected": -0.07984566688537598, "step": 10750 }, { "epoch": 0.7, "learning_rate": 1.2199582328701045e-06, "logits/chosen": -2.414492607116699, "logits/rejected": -1.9229921102523804, "logps/chosen": -289.251708984375, "logps/rejected": -257.48602294921875, "loss": 0.688, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.013818124309182167, "rewards/margins": 0.08525200188159943, "rewards/rejected": -0.07143385708332062, "step": 10760 }, { "epoch": 0.7, "learning_rate": 1.2150569027904712e-06, "logits/chosen": -2.3025307655334473, "logits/rejected": -2.191779851913452, "logps/chosen": -243.48434448242188, "logps/rejected": -245.3304901123047, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.024991249665617943, "rewards/margins": 0.08149056136608124, "rewards/rejected": -0.056499313563108444, "step": 10770 }, { "epoch": 0.71, "learning_rate": 1.2101622756034688e-06, "logits/chosen": -2.2971510887145996, "logits/rejected": -2.281870126724243, "logps/chosen": -221.53494262695312, "logps/rejected": -199.281982421875, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.012190895155072212, "rewards/margins": 0.08112286031246185, "rewards/rejected": -0.0689319595694542, "step": 10780 }, { "epoch": 0.71, "learning_rate": 1.2052743768418715e-06, "logits/chosen": -2.353773832321167, "logits/rejected": -2.1345012187957764, "logps/chosen": -243.76358032226562, "logps/rejected": -218.4962158203125, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04045509174466133, "rewards/margins": 0.08739558607339859, "rewards/rejected": -0.046940483152866364, "step": 10790 }, { "epoch": 0.71, "learning_rate": 1.2003932320033523e-06, "logits/chosen": -2.463850498199463, "logits/rejected": -2.155269145965576, "logps/chosen": -223.14895629882812, "logps/rejected": -234.60220336914062, "loss": 0.6877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.033537622541189194, "rewards/margins": 0.10821393877267838, "rewards/rejected": -0.07467631250619888, "step": 10800 }, { "epoch": 0.71, "eval_logits/chosen": -2.3170695304870605, "eval_logits/rejected": -2.129291534423828, "eval_logps/chosen": -229.36056518554688, "eval_logps/rejected": -217.1490020751953, "eval_loss": 0.6898130178451538, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": 0.026443878188729286, "eval_rewards/margins": 0.08181502670049667, "eval_rewards/rejected": -0.05537115037441254, "eval_runtime": 714.9768, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.399, "step": 10800 }, { "epoch": 0.71, "learning_rate": 1.1955188665503553e-06, "logits/chosen": -2.1240930557250977, "logits/rejected": -2.1801092624664307, "logps/chosen": -205.2183074951172, "logps/rejected": -190.25765991210938, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0009462200105190277, "rewards/margins": 0.0582948699593544, "rewards/rejected": -0.05924109369516373, "step": 10810 }, { "epoch": 0.71, "learning_rate": 1.1906513059099566e-06, "logits/chosen": -2.3073532581329346, "logits/rejected": -1.9913088083267212, "logps/chosen": -227.1727294921875, "logps/rejected": -229.9739532470703, "loss": 0.6884, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.04316357523202896, "rewards/margins": 0.11541776359081268, "rewards/rejected": -0.07225419580936432, "step": 10820 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -2.247575283050537, "logits/rejected": -2.0391554832458496, "logps/chosen": -227.498046875, "logps/rejected": -188.4443817138672, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01635267771780491, "rewards/margins": 0.08220528066158295, "rewards/rejected": -0.06585261225700378, "step": 10830 }, { "epoch": 0.71, "learning_rate": 1.1809367005976516e-06, "logits/chosen": -2.3067519664764404, "logits/rejected": -2.062891721725464, "logps/chosen": -280.78778076171875, "logps/rejected": -215.189697265625, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.017070407047867775, "rewards/margins": 0.05449339747428894, "rewards/rejected": -0.037422992289066315, "step": 10840 }, { "epoch": 0.71, "learning_rate": 1.1760897066018842e-06, "logits/chosen": -2.242995262145996, "logits/rejected": -2.1077020168304443, "logps/chosen": -219.93295288085938, "logps/rejected": -224.17984008789062, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.031081268563866615, "rewards/margins": 0.10243818908929825, "rewards/rejected": -0.07135690748691559, "step": 10850 }, { "epoch": 0.71, "learning_rate": 1.1712496187707327e-06, "logits/chosen": -2.275620698928833, "logits/rejected": -2.0290729999542236, "logps/chosen": -240.3934783935547, "logps/rejected": -239.35073852539062, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.000892448821105063, "rewards/margins": 0.13023024797439575, "rewards/rejected": -0.13112269341945648, "step": 10860 }, { "epoch": 0.71, "learning_rate": 1.1664164623524646e-06, "logits/chosen": -2.244417667388916, "logits/rejected": -2.084033727645874, "logps/chosen": -216.13034057617188, "logps/rejected": -195.35897827148438, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.022446701303124428, "rewards/margins": 0.0748591274023056, "rewards/rejected": -0.052412427961826324, "step": 10870 }, { "epoch": 0.71, "learning_rate": 1.1615902625591926e-06, "logits/chosen": -2.2117581367492676, "logits/rejected": -2.1228392124176025, "logps/chosen": -227.09072875976562, "logps/rejected": -223.296630859375, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.01869513839483261, "rewards/margins": 0.056195490062236786, "rewards/rejected": -0.0748906284570694, "step": 10880 }, { "epoch": 0.71, "learning_rate": 1.156771044566738e-06, "logits/chosen": -2.3004848957061768, "logits/rejected": -2.2247865200042725, "logps/chosen": -264.3690490722656, "logps/rejected": -228.86154174804688, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0024559935554862022, "rewards/margins": 0.0770992636680603, "rewards/rejected": -0.07464326918125153, "step": 10890 }, { "epoch": 0.71, "learning_rate": 1.1519588335145037e-06, "logits/chosen": -2.2524847984313965, "logits/rejected": -2.393233060836792, "logps/chosen": -205.72433471679688, "logps/rejected": -228.1226806640625, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": 0.018544599413871765, "rewards/margins": 0.036989279091358185, "rewards/rejected": -0.01844467595219612, "step": 10900 }, { "epoch": 0.71, "eval_logits/chosen": -2.3110759258270264, "eval_logits/rejected": -2.123832941055298, "eval_logps/chosen": -230.8058624267578, "eval_logps/rejected": -218.31472778320312, "eval_loss": 0.6898163557052612, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.011990930885076523, "eval_rewards/margins": 0.07901943475008011, "eval_rewards/rejected": -0.06702849268913269, "eval_runtime": 712.284, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 10900 }, { "epoch": 0.71, "learning_rate": 1.1471536545053382e-06, "logits/chosen": -2.343756914138794, "logits/rejected": -2.3280563354492188, "logps/chosen": -206.11807250976562, "logps/rejected": -222.1238250732422, "loss": 0.6895, "rewards/accuracies": 0.625, "rewards/chosen": 0.02916853502392769, "rewards/margins": 0.0845358818769455, "rewards/rejected": -0.05536733940243721, "step": 10910 }, { "epoch": 0.71, "learning_rate": 1.1423555326054112e-06, "logits/chosen": -2.2487311363220215, "logits/rejected": -1.972808837890625, "logps/chosen": -279.33294677734375, "logps/rejected": -221.55191040039062, "loss": 0.6837, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.035016145557165146, "rewards/margins": 0.1565500795841217, "rewards/rejected": -0.12153393030166626, "step": 10920 }, { "epoch": 0.72, "learning_rate": 1.1375644928440743e-06, "logits/chosen": -2.3918063640594482, "logits/rejected": -1.9648542404174805, "logps/chosen": -234.3190155029297, "logps/rejected": -176.64840698242188, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.023642176762223244, "rewards/margins": 0.09833236038684845, "rewards/rejected": -0.07469018548727036, "step": 10930 }, { "epoch": 0.72, "learning_rate": 1.1327805602137396e-06, "logits/chosen": -2.329948663711548, "logits/rejected": -2.149320602416992, "logps/chosen": -258.65191650390625, "logps/rejected": -214.11959838867188, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0009221710497513413, "rewards/margins": 0.08445750176906586, "rewards/rejected": -0.0835353285074234, "step": 10940 }, { "epoch": 0.72, "learning_rate": 1.1280037596697426e-06, "logits/chosen": -2.181227922439575, "logits/rejected": -2.1459288597106934, "logps/chosen": -220.15103149414062, "logps/rejected": -270.27410888671875, "loss": 0.6846, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.007314229849725962, "rewards/margins": 0.13196273148059845, "rewards/rejected": -0.12464849650859833, "step": 10950 }, { "epoch": 0.72, "learning_rate": 1.123234116130216e-06, "logits/chosen": -2.2392992973327637, "logits/rejected": -2.20440673828125, "logps/chosen": -185.53500366210938, "logps/rejected": -203.2857208251953, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0035335198044776917, "rewards/margins": 0.11696387827396393, "rewards/rejected": -0.11343035846948624, "step": 10960 }, { "epoch": 0.72, "learning_rate": 1.1184716544759553e-06, "logits/chosen": -2.0971415042877197, "logits/rejected": -2.0736968517303467, "logps/chosen": -163.72232055664062, "logps/rejected": -181.16928100585938, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01641870103776455, "rewards/margins": 0.05032380297780037, "rewards/rejected": -0.03390509635210037, "step": 10970 }, { "epoch": 0.72, "learning_rate": 1.1137163995502948e-06, "logits/chosen": -2.5290920734405518, "logits/rejected": -2.2951109409332275, "logps/chosen": -218.7498779296875, "logps/rejected": -193.7891387939453, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02785148099064827, "rewards/margins": 0.07741155475378036, "rewards/rejected": -0.0495600700378418, "step": 10980 }, { "epoch": 0.72, "learning_rate": 1.1089683761589717e-06, "logits/chosen": -2.1559228897094727, "logits/rejected": -1.998282790184021, "logps/chosen": -235.5149383544922, "logps/rejected": -228.6207275390625, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04691413417458534, "rewards/margins": 0.12020029127597809, "rewards/rejected": -0.07328616082668304, "step": 10990 }, { "epoch": 0.72, "learning_rate": 1.1042276090700044e-06, "logits/chosen": -2.3137733936309814, "logits/rejected": -2.258112668991089, "logps/chosen": -211.1614227294922, "logps/rejected": -247.2993927001953, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": -0.008593087084591389, "rewards/margins": 0.05968620628118515, "rewards/rejected": -0.06827928870916367, "step": 11000 }, { "epoch": 0.72, "eval_logits/chosen": -2.312530279159546, "eval_logits/rejected": -2.125136613845825, "eval_logps/chosen": -229.34445190429688, "eval_logps/rejected": -216.98072814941406, "eval_loss": 0.689818263053894, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": 0.026604950428009033, "eval_rewards/margins": 0.08029335737228394, "eval_rewards/rejected": -0.0536884069442749, "eval_runtime": 715.7804, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 11000 }, { "epoch": 0.72, "learning_rate": 1.0994941230135536e-06, "logits/chosen": -2.2787528038024902, "logits/rejected": -1.983764410018921, "logps/chosen": -228.21664428710938, "logps/rejected": -204.8932342529297, "loss": 0.6872, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04633352905511856, "rewards/margins": 0.12736350297927856, "rewards/rejected": -0.0810299813747406, "step": 11010 }, { "epoch": 0.72, "learning_rate": 1.094767942681804e-06, "logits/chosen": -2.557792901992798, "logits/rejected": -2.1338038444519043, "logps/chosen": -249.3724365234375, "logps/rejected": -218.78439331054688, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.015034383162856102, "rewards/margins": 0.0861252024769783, "rewards/rejected": -0.10115957260131836, "step": 11020 }, { "epoch": 0.72, "learning_rate": 1.0900490927288248e-06, "logits/chosen": -2.0898823738098145, "logits/rejected": -2.044926166534424, "logps/chosen": -258.9222106933594, "logps/rejected": -215.9126739501953, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.015460757538676262, "rewards/margins": 0.06466784328222275, "rewards/rejected": -0.049207091331481934, "step": 11030 }, { "epoch": 0.72, "learning_rate": 1.0853375977704511e-06, "logits/chosen": -2.3183531761169434, "logits/rejected": -2.1303043365478516, "logps/chosen": -237.7437744140625, "logps/rejected": -181.30435180664062, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.026807749643921852, "rewards/margins": 0.07571287453174591, "rewards/rejected": -0.04890512302517891, "step": 11040 }, { "epoch": 0.72, "learning_rate": 1.0806334823841466e-06, "logits/chosen": -2.1648404598236084, "logits/rejected": -2.268681764602661, "logps/chosen": -239.4142608642578, "logps/rejected": -274.93853759765625, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.002016544807702303, "rewards/margins": 0.061211831867694855, "rewards/rejected": -0.05919528007507324, "step": 11050 }, { "epoch": 0.72, "learning_rate": 1.0759367711088825e-06, "logits/chosen": -2.175215244293213, "logits/rejected": -2.3136703968048096, "logps/chosen": -180.73880004882812, "logps/rejected": -219.13919067382812, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": 0.030937161296606064, "rewards/margins": 0.049232009798288345, "rewards/rejected": -0.018294844776391983, "step": 11060 }, { "epoch": 0.72, "learning_rate": 1.0712474884450056e-06, "logits/chosen": -2.2868709564208984, "logits/rejected": -2.0856966972351074, "logps/chosen": -200.55062866210938, "logps/rejected": -180.42941284179688, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02282613143324852, "rewards/margins": 0.08665598928928375, "rewards/rejected": -0.06382984668016434, "step": 11070 }, { "epoch": 0.72, "learning_rate": 1.066565658854112e-06, "logits/chosen": -2.2322375774383545, "logits/rejected": -2.195129871368408, "logps/chosen": -127.13566589355469, "logps/rejected": -132.34140014648438, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.006962375249713659, "rewards/margins": 0.08438241481781006, "rewards/rejected": -0.07742004096508026, "step": 11080 }, { "epoch": 0.73, "learning_rate": 1.0618913067589165e-06, "logits/chosen": -2.3321659564971924, "logits/rejected": -2.115520715713501, "logps/chosen": -213.48275756835938, "logps/rejected": -184.6875457763672, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": 0.0233194287866354, "rewards/margins": 0.08664701133966446, "rewards/rejected": -0.06332757323980331, "step": 11090 }, { "epoch": 0.73, "learning_rate": 1.0572244565431313e-06, "logits/chosen": -2.1922779083251953, "logits/rejected": -2.075507402420044, "logps/chosen": -143.26010131835938, "logps/rejected": -156.20030212402344, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02156703732907772, "rewards/margins": 0.07779018580913544, "rewards/rejected": -0.05622314661741257, "step": 11100 }, { "epoch": 0.73, "eval_logits/chosen": -2.313178300857544, "eval_logits/rejected": -2.1258115768432617, "eval_logps/chosen": -228.88185119628906, "eval_logps/rejected": -216.5213623046875, "eval_loss": 0.6898159384727478, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.031231021508574486, "eval_rewards/margins": 0.08032544702291489, "eval_rewards/rejected": -0.04909442365169525, "eval_runtime": 712.9033, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 11100 }, { "epoch": 0.73, "learning_rate": 1.0525651325513317e-06, "logits/chosen": -2.316305637359619, "logits/rejected": -2.294666290283203, "logps/chosen": -327.4156188964844, "logps/rejected": -310.8594970703125, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03854013979434967, "rewards/margins": 0.04599260538816452, "rewards/rejected": -0.007452460937201977, "step": 11110 }, { "epoch": 0.73, "learning_rate": 1.0479133590888351e-06, "logits/chosen": -2.352234363555908, "logits/rejected": -2.059476613998413, "logps/chosen": -252.9855194091797, "logps/rejected": -235.13720703125, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.024694135412573814, "rewards/margins": 0.0912385955452919, "rewards/rejected": -0.06654445827007294, "step": 11120 }, { "epoch": 0.73, "learning_rate": 1.0432691604215695e-06, "logits/chosen": -2.231292963027954, "logits/rejected": -2.14487624168396, "logps/chosen": -231.04995727539062, "logps/rejected": -204.88375854492188, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0399702712893486, "rewards/margins": 0.055469810962677, "rewards/rejected": -0.015499535016715527, "step": 11130 }, { "epoch": 0.73, "learning_rate": 1.0386325607759515e-06, "logits/chosen": -2.2429559230804443, "logits/rejected": -2.180182456970215, "logps/chosen": -187.20619201660156, "logps/rejected": -175.9566650390625, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.041839271783828735, "rewards/margins": 0.08820907026529312, "rewards/rejected": -0.04636979475617409, "step": 11140 }, { "epoch": 0.73, "learning_rate": 1.0340035843387544e-06, "logits/chosen": -2.34846830368042, "logits/rejected": -2.021655797958374, "logps/chosen": -177.23959350585938, "logps/rejected": -156.76651000976562, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.024959605187177658, "rewards/margins": 0.07109765708446503, "rewards/rejected": -0.04613804817199707, "step": 11150 }, { "epoch": 0.73, "learning_rate": 1.0293822552569887e-06, "logits/chosen": -2.459784746170044, "logits/rejected": -2.162053108215332, "logps/chosen": -248.28750610351562, "logps/rejected": -202.97152709960938, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.035963498055934906, "rewards/margins": 0.10514490306377411, "rewards/rejected": -0.0691814124584198, "step": 11160 }, { "epoch": 0.73, "learning_rate": 1.0247685976377688e-06, "logits/chosen": -2.2233853340148926, "logits/rejected": -2.050699234008789, "logps/chosen": -179.34494018554688, "logps/rejected": -148.3097686767578, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.023400804027915, "rewards/margins": 0.07796212285757065, "rewards/rejected": -0.0545613169670105, "step": 11170 }, { "epoch": 0.73, "learning_rate": 1.0201626355481939e-06, "logits/chosen": -2.4143099784851074, "logits/rejected": -2.1229655742645264, "logps/chosen": -214.44876098632812, "logps/rejected": -169.19232177734375, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01671757362782955, "rewards/margins": 0.0830346867442131, "rewards/rejected": -0.0663171112537384, "step": 11180 }, { "epoch": 0.73, "learning_rate": 1.0155643930152192e-06, "logits/chosen": -2.4184117317199707, "logits/rejected": -2.33054780960083, "logps/chosen": -270.9144592285156, "logps/rejected": -222.2581787109375, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": 0.016928743571043015, "rewards/margins": 0.059895895421504974, "rewards/rejected": -0.04296715185046196, "step": 11190 }, { "epoch": 0.73, "learning_rate": 1.0109738940255286e-06, "logits/chosen": -2.244631290435791, "logits/rejected": -1.9772279262542725, "logps/chosen": -214.3131561279297, "logps/rejected": -183.52496337890625, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": 0.02669384703040123, "rewards/margins": 0.06493046879768372, "rewards/rejected": -0.038236625492572784, "step": 11200 }, { "epoch": 0.73, "eval_logits/chosen": -2.3133840560913086, "eval_logits/rejected": -2.126025438308716, "eval_logps/chosen": -228.95094299316406, "eval_logps/rejected": -216.6020965576172, "eval_loss": 0.6898157000541687, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": 0.03054005466401577, "eval_rewards/margins": 0.08044183254241943, "eval_rewards/rejected": -0.04990177974104881, "eval_runtime": 716.7898, "eval_samples_per_second": 2.79, "eval_steps_per_second": 1.395, "step": 11200 }, { "epoch": 0.73, "learning_rate": 1.0063911625254155e-06, "logits/chosen": -2.3134210109710693, "logits/rejected": -2.174760341644287, "logps/chosen": -229.67391967773438, "logps/rejected": -238.20559692382812, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06248721480369568, "rewards/margins": 0.0790223479270935, "rewards/rejected": -0.016535133123397827, "step": 11210 }, { "epoch": 0.73, "learning_rate": 1.0018162224206502e-06, "logits/chosen": -2.220797061920166, "logits/rejected": -2.130765438079834, "logps/chosen": -172.0438232421875, "logps/rejected": -187.99996948242188, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.024309862405061722, "rewards/margins": 0.1224609836935997, "rewards/rejected": -0.09815112501382828, "step": 11220 }, { "epoch": 0.73, "learning_rate": 9.97249097576363e-07, "logits/chosen": -2.467745304107666, "logits/rejected": -2.184107780456543, "logps/chosen": -230.2428741455078, "logps/rejected": -197.06825256347656, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.028137261047959328, "rewards/margins": 0.10317236185073853, "rewards/rejected": -0.07503510266542435, "step": 11230 }, { "epoch": 0.74, "learning_rate": 9.92689811816913e-07, "logits/chosen": -2.336843967437744, "logits/rejected": -2.1020307540893555, "logps/chosen": -213.20925903320312, "logps/rejected": -173.838134765625, "loss": 0.6902, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0068805525079369545, "rewards/margins": 0.08031884580850601, "rewards/rejected": -0.08719939738512039, "step": 11240 }, { "epoch": 0.74, "learning_rate": 9.881383889257691e-07, "logits/chosen": -2.2797598838806152, "logits/rejected": -2.319304943084717, "logps/chosen": -165.97084045410156, "logps/rejected": -236.264404296875, "loss": 0.6898, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.03326042369008064, "rewards/margins": 0.06215088814496994, "rewards/rejected": -0.028890466317534447, "step": 11250 }, { "epoch": 0.74, "learning_rate": 9.835948526453817e-07, "logits/chosen": -2.1445116996765137, "logits/rejected": -2.3151485919952393, "logps/chosen": -179.73304748535156, "logps/rejected": -228.52316284179688, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0069632395170629025, "rewards/margins": 0.05118337273597717, "rewards/rejected": -0.044220130890607834, "step": 11260 }, { "epoch": 0.74, "learning_rate": 9.790592266770633e-07, "logits/chosen": -2.5001702308654785, "logits/rejected": -2.2158002853393555, "logps/chosen": -266.3745422363281, "logps/rejected": -245.5768280029297, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02970188483595848, "rewards/margins": 0.07441006600856781, "rewards/rejected": -0.044708192348480225, "step": 11270 }, { "epoch": 0.74, "learning_rate": 9.745315346808584e-07, "logits/chosen": -2.18174409866333, "logits/rejected": -2.029189348220825, "logps/chosen": -215.11642456054688, "logps/rejected": -199.79173278808594, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.039590004831552505, "rewards/margins": 0.07058382779359818, "rewards/rejected": -0.03099382482469082, "step": 11280 }, { "epoch": 0.74, "learning_rate": 9.70011800275428e-07, "logits/chosen": -2.236311674118042, "logits/rejected": -2.1652560234069824, "logps/chosen": -235.71142578125, "logps/rejected": -255.64370727539062, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.018134601414203644, "rewards/margins": 0.08486216515302658, "rewards/rejected": -0.06672756373882294, "step": 11290 }, { "epoch": 0.74, "learning_rate": 9.655000470379206e-07, "logits/chosen": -2.1597790718078613, "logits/rejected": -2.0728516578674316, "logps/chosen": -209.0076446533203, "logps/rejected": -208.43862915039062, "loss": 0.6879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.013398419134318829, "rewards/margins": 0.10113543272018433, "rewards/rejected": -0.08773700892925262, "step": 11300 }, { "epoch": 0.74, "eval_logits/chosen": -2.315458297729492, "eval_logits/rejected": -2.127800226211548, "eval_logps/chosen": -229.95440673828125, "eval_logps/rejected": -217.7364959716797, "eval_loss": 0.6897902488708496, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.020505422726273537, "eval_rewards/margins": 0.08175148069858551, "eval_rewards/rejected": -0.061246056109666824, "eval_runtime": 712.1423, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 11300 }, { "epoch": 0.74, "learning_rate": 9.609962985038517e-07, "logits/chosen": -2.427191734313965, "logits/rejected": -2.028752088546753, "logps/chosen": -213.2774658203125, "logps/rejected": -200.72789001464844, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.020835842937231064, "rewards/margins": 0.12575358152389526, "rewards/rejected": -0.1049177423119545, "step": 11310 }, { "epoch": 0.74, "learning_rate": 9.565005781669786e-07, "logits/chosen": -2.4781110286712646, "logits/rejected": -2.1154332160949707, "logps/chosen": -257.0857238769531, "logps/rejected": -213.7594451904297, "loss": 0.6879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0350492037832737, "rewards/margins": 0.09986601769924164, "rewards/rejected": -0.06481683254241943, "step": 11320 }, { "epoch": 0.74, "learning_rate": 9.520129094791822e-07, "logits/chosen": -2.2771661281585693, "logits/rejected": -2.1108384132385254, "logps/chosen": -179.57064819335938, "logps/rejected": -179.90342712402344, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0007491965079680085, "rewards/margins": 0.1053602546453476, "rewards/rejected": -0.10610946267843246, "step": 11330 }, { "epoch": 0.74, "learning_rate": 9.475333158503389e-07, "logits/chosen": -2.2677841186523438, "logits/rejected": -1.972190499305725, "logps/chosen": -215.33609008789062, "logps/rejected": -175.67623901367188, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.024861928075551987, "rewards/margins": 0.0549871027469635, "rewards/rejected": -0.030125176534056664, "step": 11340 }, { "epoch": 0.74, "learning_rate": 9.430618206482053e-07, "logits/chosen": -2.2450308799743652, "logits/rejected": -2.185279130935669, "logps/chosen": -135.9246063232422, "logps/rejected": -141.6876220703125, "loss": 0.6916, "rewards/accuracies": 0.5, "rewards/chosen": 0.044983211904764175, "rewards/margins": 0.05167509242892265, "rewards/rejected": -0.006691886577755213, "step": 11350 }, { "epoch": 0.74, "learning_rate": 9.385984471982892e-07, "logits/chosen": -2.222777843475342, "logits/rejected": -1.859256386756897, "logps/chosen": -213.132080078125, "logps/rejected": -176.3279571533203, "loss": 0.6865, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.016937825828790665, "rewards/margins": 0.13930463790893555, "rewards/rejected": -0.12236680835485458, "step": 11360 }, { "epoch": 0.74, "learning_rate": 9.341432187837343e-07, "logits/chosen": -2.2822318077087402, "logits/rejected": -2.191162586212158, "logps/chosen": -187.1429443359375, "logps/rejected": -212.93783569335938, "loss": 0.686, "rewards/accuracies": 0.75, "rewards/chosen": 0.03160820156335831, "rewards/margins": 0.10926495492458344, "rewards/rejected": -0.07765677571296692, "step": 11370 }, { "epoch": 0.74, "learning_rate": 9.29696158645193e-07, "logits/chosen": -2.222790241241455, "logits/rejected": -2.32561993598938, "logps/chosen": -218.55014038085938, "logps/rejected": -259.57879638671875, "loss": 0.6882, "rewards/accuracies": 0.75, "rewards/chosen": 0.02552112378180027, "rewards/margins": 0.12038824707269669, "rewards/rejected": -0.09486713260412216, "step": 11380 }, { "epoch": 0.75, "learning_rate": 9.252572899807111e-07, "logits/chosen": -2.2863879203796387, "logits/rejected": -2.305600166320801, "logps/chosen": -268.54376220703125, "logps/rejected": -253.93896484375, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.020558223128318787, "rewards/margins": 0.11582138389348984, "rewards/rejected": -0.09526316076517105, "step": 11390 }, { "epoch": 0.75, "learning_rate": 9.208266359456003e-07, "logits/chosen": -2.3670859336853027, "logits/rejected": -2.106987237930298, "logps/chosen": -202.4696807861328, "logps/rejected": -219.4122772216797, "loss": 0.6896, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.03336096554994583, "rewards/margins": 0.07371880859136581, "rewards/rejected": -0.04035785049200058, "step": 11400 }, { "epoch": 0.75, "eval_logits/chosen": -2.31719708442688, "eval_logits/rejected": -2.1292054653167725, "eval_logps/chosen": -230.30577087402344, "eval_logps/rejected": -218.55361938476562, "eval_loss": 0.6897911429405212, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.01699184998869896, "eval_rewards/margins": 0.08640897274017334, "eval_rewards/rejected": -0.06941711902618408, "eval_runtime": 713.2491, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 11400 }, { "epoch": 0.75, "learning_rate": 9.164042196523229e-07, "logits/chosen": -2.4905319213867188, "logits/rejected": -2.1854054927825928, "logps/chosen": -187.57745361328125, "logps/rejected": -185.0448455810547, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.018236028030514717, "rewards/margins": 0.1098506823182106, "rewards/rejected": -0.09161464869976044, "step": 11410 }, { "epoch": 0.75, "learning_rate": 9.119900641703696e-07, "logits/chosen": -2.4532532691955566, "logits/rejected": -2.1825637817382812, "logps/chosen": -211.77310180664062, "logps/rejected": -174.50393676757812, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.016732508316636086, "rewards/margins": 0.09065760672092438, "rewards/rejected": -0.07392510026693344, "step": 11420 }, { "epoch": 0.75, "learning_rate": 9.075841925261364e-07, "logits/chosen": -2.5480093955993652, "logits/rejected": -2.3063910007476807, "logps/chosen": -235.31533813476562, "logps/rejected": -228.7047882080078, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.026038330048322678, "rewards/margins": 0.0781828910112381, "rewards/rejected": -0.05214455723762512, "step": 11430 }, { "epoch": 0.75, "learning_rate": 9.031866277028093e-07, "logits/chosen": -2.2410953044891357, "logits/rejected": -2.2712674140930176, "logps/chosen": -190.3566436767578, "logps/rejected": -225.5463104248047, "loss": 0.6889, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.007181202061474323, "rewards/margins": 0.08380020409822464, "rewards/rejected": -0.07661899924278259, "step": 11440 }, { "epoch": 0.75, "learning_rate": 8.987973926402391e-07, "logits/chosen": -2.2060625553131104, "logits/rejected": -2.242389440536499, "logps/chosen": -209.5387420654297, "logps/rejected": -218.03573608398438, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": 0.028008287772536278, "rewards/margins": 0.09084399789571762, "rewards/rejected": -0.0628357082605362, "step": 11450 }, { "epoch": 0.75, "learning_rate": 8.944165102348273e-07, "logits/chosen": -2.419955015182495, "logits/rejected": -2.27038311958313, "logps/chosen": -153.3720703125, "logps/rejected": -171.96742248535156, "loss": 0.6879, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0357503779232502, "rewards/margins": 0.11337963491678238, "rewards/rejected": -0.07762926071882248, "step": 11460 }, { "epoch": 0.75, "learning_rate": 8.900440033394018e-07, "logits/chosen": -2.2393195629119873, "logits/rejected": -2.2905590534210205, "logps/chosen": -179.89437866210938, "logps/rejected": -177.8740692138672, "loss": 0.6904, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02621867135167122, "rewards/margins": 0.07265409827232361, "rewards/rejected": -0.04643542319536209, "step": 11470 }, { "epoch": 0.75, "learning_rate": 8.856798947631009e-07, "logits/chosen": -2.263493537902832, "logits/rejected": -2.2867515087127686, "logps/chosen": -187.29135131835938, "logps/rejected": -215.5435791015625, "loss": 0.6878, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03584372624754906, "rewards/margins": 0.11300251632928848, "rewards/rejected": -0.07715878635644913, "step": 11480 }, { "epoch": 0.75, "learning_rate": 8.813242072712519e-07, "logits/chosen": -2.0134589672088623, "logits/rejected": -1.897157073020935, "logps/chosen": -162.03453063964844, "logps/rejected": -171.01966857910156, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.001722379820421338, "rewards/margins": 0.0784909725189209, "rewards/rejected": -0.08021334558725357, "step": 11490 }, { "epoch": 0.75, "learning_rate": 8.769769635852557e-07, "logits/chosen": -2.2438220977783203, "logits/rejected": -2.300549268722534, "logps/chosen": -213.259521484375, "logps/rejected": -182.95872497558594, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.020482342690229416, "rewards/margins": 0.06456250697374344, "rewards/rejected": -0.04408016428351402, "step": 11500 }, { "epoch": 0.75, "eval_logits/chosen": -2.3182504177093506, "eval_logits/rejected": -2.1302804946899414, "eval_logps/chosen": -230.00030517578125, "eval_logps/rejected": -217.7165069580078, "eval_loss": 0.6898082494735718, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": 0.02004634030163288, "eval_rewards/margins": 0.08109237998723984, "eval_rewards/rejected": -0.06104603409767151, "eval_runtime": 713.4185, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 11500 }, { "epoch": 0.75, "learning_rate": 8.726381863824635e-07, "logits/chosen": -2.4795243740081787, "logits/rejected": -2.1508588790893555, "logps/chosen": -280.99859619140625, "logps/rejected": -221.33462524414062, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": 0.04078471660614014, "rewards/margins": 0.0774230808019638, "rewards/rejected": -0.036638353019952774, "step": 11510 }, { "epoch": 0.75, "learning_rate": 8.683078982960638e-07, "logits/chosen": -2.23110294342041, "logits/rejected": -1.9223436117172241, "logps/chosen": -224.91244506835938, "logps/rejected": -185.7821044921875, "loss": 0.6873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0022748790215700865, "rewards/margins": 0.1096058264374733, "rewards/rejected": -0.11188069730997086, "step": 11520 }, { "epoch": 0.75, "learning_rate": 8.639861219149584e-07, "logits/chosen": -2.0781750679016113, "logits/rejected": -2.1381192207336426, "logps/chosen": -263.2950134277344, "logps/rejected": -229.32223510742188, "loss": 0.6875, "rewards/accuracies": 0.625, "rewards/chosen": 0.006869921926409006, "rewards/margins": 0.08951739221811295, "rewards/rejected": -0.08264746516942978, "step": 11530 }, { "epoch": 0.76, "learning_rate": 8.596728797836532e-07, "logits/chosen": -2.211719274520874, "logits/rejected": -2.0574100017547607, "logps/chosen": -212.7632293701172, "logps/rejected": -255.2969512939453, "loss": 0.6865, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.025454815477132797, "rewards/margins": 0.12940728664398193, "rewards/rejected": -0.10395244508981705, "step": 11540 }, { "epoch": 0.76, "learning_rate": 8.553681944021294e-07, "logits/chosen": -2.264085054397583, "logits/rejected": -2.2858448028564453, "logps/chosen": -237.9873809814453, "logps/rejected": -227.55477905273438, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": 0.010966275818645954, "rewards/margins": 0.08914720267057419, "rewards/rejected": -0.07818093150854111, "step": 11550 }, { "epoch": 0.76, "learning_rate": 8.510720882257365e-07, "logits/chosen": -2.041898727416992, "logits/rejected": -2.1856493949890137, "logps/chosen": -158.22018432617188, "logps/rejected": -210.814208984375, "loss": 0.6862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.030803903937339783, "rewards/margins": 0.1136208325624466, "rewards/rejected": -0.08281692862510681, "step": 11560 }, { "epoch": 0.76, "learning_rate": 8.467845836650667e-07, "logits/chosen": -1.929513692855835, "logits/rejected": -1.977299451828003, "logps/chosen": -207.1142578125, "logps/rejected": -222.85317993164062, "loss": 0.6872, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011523036286234856, "rewards/margins": 0.09183812886476517, "rewards/rejected": -0.08031509816646576, "step": 11570 }, { "epoch": 0.76, "learning_rate": 8.425057030858461e-07, "logits/chosen": -2.1237521171569824, "logits/rejected": -1.9577020406723022, "logps/chosen": -155.14407348632812, "logps/rejected": -192.67381286621094, "loss": 0.6894, "rewards/accuracies": 0.5, "rewards/chosen": 0.009118120186030865, "rewards/margins": 0.08506849408149719, "rewards/rejected": -0.07595036923885345, "step": 11580 }, { "epoch": 0.76, "learning_rate": 8.382354688088098e-07, "logits/chosen": -2.2753641605377197, "logits/rejected": -2.135500192642212, "logps/chosen": -157.50479125976562, "logps/rejected": -181.17617797851562, "loss": 0.69, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006505739875137806, "rewards/margins": 0.08248453587293625, "rewards/rejected": -0.07597880065441132, "step": 11590 }, { "epoch": 0.76, "learning_rate": 8.33973903109594e-07, "logits/chosen": -2.396981954574585, "logits/rejected": -2.1408324241638184, "logps/chosen": -216.294677734375, "logps/rejected": -194.80892944335938, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0030242991633713245, "rewards/margins": 0.10075845569372177, "rewards/rejected": -0.09773416072130203, "step": 11600 }, { "epoch": 0.76, "eval_logits/chosen": -2.314687967300415, "eval_logits/rejected": -2.126920223236084, "eval_logps/chosen": -231.07017517089844, "eval_logps/rejected": -219.44680786132812, "eval_loss": 0.6898018717765808, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.009347718209028244, "eval_rewards/margins": 0.08769676089286804, "eval_rewards/rejected": -0.0783490464091301, "eval_runtime": 711.5416, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 11600 }, { "epoch": 0.76, "learning_rate": 8.297210282186102e-07, "logits/chosen": -2.1594557762145996, "logits/rejected": -2.1344146728515625, "logps/chosen": -232.7439727783203, "logps/rejected": -261.56768798828125, "loss": 0.6891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.027264848351478577, "rewards/margins": 0.07457095384597778, "rewards/rejected": -0.10183580219745636, "step": 11610 }, { "epoch": 0.76, "learning_rate": 8.254768663209397e-07, "logits/chosen": -2.260918378829956, "logits/rejected": -2.054774522781372, "logps/chosen": -277.40802001953125, "logps/rejected": -221.30020141601562, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01171756163239479, "rewards/margins": 0.0636371374130249, "rewards/rejected": -0.05191957205533981, "step": 11620 }, { "epoch": 0.76, "learning_rate": 8.212414395562079e-07, "logits/chosen": -2.1018333435058594, "logits/rejected": -2.234898090362549, "logps/chosen": -231.6614990234375, "logps/rejected": -261.87396240234375, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.014473098330199718, "rewards/margins": 0.06291624903678894, "rewards/rejected": -0.07738934457302094, "step": 11630 }, { "epoch": 0.76, "learning_rate": 8.170147700184775e-07, "logits/chosen": -2.3055419921875, "logits/rejected": -2.1873691082000732, "logps/chosen": -254.23110961914062, "logps/rejected": -254.5798797607422, "loss": 0.6906, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.022202350199222565, "rewards/margins": 0.09281570464372635, "rewards/rejected": -0.07061335444450378, "step": 11640 }, { "epoch": 0.76, "learning_rate": 8.127968797561242e-07, "logits/chosen": -2.353506565093994, "logits/rejected": -2.0914809703826904, "logps/chosen": -223.8273162841797, "logps/rejected": -212.8360595703125, "loss": 0.6888, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0014341063797473907, "rewards/margins": 0.11431723833084106, "rewards/rejected": -0.11288313567638397, "step": 11650 }, { "epoch": 0.76, "learning_rate": 8.085877907717338e-07, "logits/chosen": -2.246596574783325, "logits/rejected": -2.2063441276550293, "logps/chosen": -219.5164794921875, "logps/rejected": -218.2258758544922, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013950645923614502, "rewards/margins": 0.10388661921024323, "rewards/rejected": -0.08993595838546753, "step": 11660 }, { "epoch": 0.76, "learning_rate": 8.043875250219732e-07, "logits/chosen": -2.2046780586242676, "logits/rejected": -2.141763687133789, "logps/chosen": -231.0264434814453, "logps/rejected": -213.81973266601562, "loss": 0.6909, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.016217241063714027, "rewards/margins": 0.03862147778272629, "rewards/rejected": -0.054838716983795166, "step": 11670 }, { "epoch": 0.76, "learning_rate": 8.001961044174881e-07, "logits/chosen": -2.3708043098449707, "logits/rejected": -2.1657023429870605, "logps/chosen": -230.3643341064453, "logps/rejected": -176.6756591796875, "loss": 0.6937, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.01899898424744606, "rewards/margins": 0.03499449044466019, "rewards/rejected": -0.053993482142686844, "step": 11680 }, { "epoch": 0.76, "learning_rate": 7.960135508227795e-07, "logits/chosen": -2.3832452297210693, "logits/rejected": -2.029789686203003, "logps/chosen": -294.03143310546875, "logps/rejected": -236.58212280273438, "loss": 0.691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.001624174416065216, "rewards/margins": 0.06866665184497833, "rewards/rejected": -0.07029082626104355, "step": 11690 }, { "epoch": 0.77, "learning_rate": 7.91839886056098e-07, "logits/chosen": -2.3958792686462402, "logits/rejected": -2.139774799346924, "logps/chosen": -285.03851318359375, "logps/rejected": -261.7726135253906, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.012762362137436867, "rewards/margins": 0.07304862141609192, "rewards/rejected": -0.08581098169088364, "step": 11700 }, { "epoch": 0.77, "eval_logits/chosen": -2.31754469871521, "eval_logits/rejected": -2.1296095848083496, "eval_logps/chosen": -231.76707458496094, "eval_logps/rejected": -219.65855407714844, "eval_loss": 0.6898021697998047, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.002378788311034441, "eval_rewards/margins": 0.08284525573253632, "eval_rewards/rejected": -0.08046647161245346, "eval_runtime": 714.8864, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 11700 }, { "epoch": 0.77, "learning_rate": 7.876751318893217e-07, "logits/chosen": -2.2193684577941895, "logits/rejected": -1.9712486267089844, "logps/chosen": -236.8195343017578, "logps/rejected": -224.7071533203125, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.010461434721946716, "rewards/margins": 0.09475782513618469, "rewards/rejected": -0.08429639786481857, "step": 11710 }, { "epoch": 0.77, "learning_rate": 7.8351931004785e-07, "logits/chosen": -2.2037353515625, "logits/rejected": -1.8471559286117554, "logps/chosen": -207.496337890625, "logps/rejected": -191.63697814941406, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": 0.011319467797875404, "rewards/margins": 0.09023983776569366, "rewards/rejected": -0.07892037183046341, "step": 11720 }, { "epoch": 0.77, "learning_rate": 7.793724422104834e-07, "logits/chosen": -2.0594654083251953, "logits/rejected": -2.2132391929626465, "logps/chosen": -208.775146484375, "logps/rejected": -288.7984924316406, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0004916332545690238, "rewards/margins": 0.10546108335256577, "rewards/rejected": -0.10496945679187775, "step": 11730 }, { "epoch": 0.77, "learning_rate": 7.752345500093184e-07, "logits/chosen": -2.360964059829712, "logits/rejected": -2.3283915519714355, "logps/chosen": -213.69003295898438, "logps/rejected": -188.79702758789062, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03534995764493942, "rewards/margins": 0.048714593052864075, "rewards/rejected": -0.0840645581483841, "step": 11740 }, { "epoch": 0.77, "learning_rate": 7.711056550296253e-07, "logits/chosen": -2.4189441204071045, "logits/rejected": -2.2487740516662598, "logps/chosen": -245.80990600585938, "logps/rejected": -219.0479736328125, "loss": 0.6919, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.009839094243943691, "rewards/margins": 0.10263122618198395, "rewards/rejected": -0.09279213845729828, "step": 11750 }, { "epoch": 0.77, "learning_rate": 7.669857788097445e-07, "logits/chosen": -2.1502368450164795, "logits/rejected": -1.9063570499420166, "logps/chosen": -164.16265869140625, "logps/rejected": -200.6251678466797, "loss": 0.6881, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.025966918095946312, "rewards/margins": 0.09837634116411209, "rewards/rejected": -0.12434325367212296, "step": 11760 }, { "epoch": 0.77, "learning_rate": 7.628749428409676e-07, "logits/chosen": -2.410966157913208, "logits/rejected": -1.9934518337249756, "logps/chosen": -239.00985717773438, "logps/rejected": -188.3817138671875, "loss": 0.6898, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.02122049406170845, "rewards/margins": 0.08123396337032318, "rewards/rejected": -0.10245446860790253, "step": 11770 }, { "epoch": 0.77, "learning_rate": 7.587731685674288e-07, "logits/chosen": -2.293890953063965, "logits/rejected": -2.332033157348633, "logps/chosen": -272.982666015625, "logps/rejected": -288.7724304199219, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006056091282516718, "rewards/margins": 0.07952813804149628, "rewards/rejected": -0.0734720379114151, "step": 11780 }, { "epoch": 0.77, "learning_rate": 7.546804773859931e-07, "logits/chosen": -2.392029047012329, "logits/rejected": -2.1629507541656494, "logps/chosen": -228.03903198242188, "logps/rejected": -219.1748809814453, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009989907965064049, "rewards/margins": 0.11071814596652985, "rewards/rejected": -0.12070806324481964, "step": 11790 }, { "epoch": 0.77, "learning_rate": 7.505968906461409e-07, "logits/chosen": -2.295881509780884, "logits/rejected": -2.148301362991333, "logps/chosen": -243.2596893310547, "logps/rejected": -226.50534057617188, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.023195995017886162, "rewards/margins": 0.07391957193613052, "rewards/rejected": -0.09711556136608124, "step": 11800 }, { "epoch": 0.77, "eval_logits/chosen": -2.3191685676574707, "eval_logits/rejected": -2.1311001777648926, "eval_logps/chosen": -232.53021240234375, "eval_logps/rejected": -220.31980895996094, "eval_loss": 0.6897976398468018, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.005252572242170572, "eval_rewards/margins": 0.08182655274868011, "eval_rewards/rejected": -0.08707911521196365, "eval_runtime": 712.4035, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 11800 }, { "epoch": 0.77, "learning_rate": 7.465224296498627e-07, "logits/chosen": -2.3691840171813965, "logits/rejected": -1.9838594198226929, "logps/chosen": -233.9862823486328, "logps/rejected": -198.72694396972656, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010271742939949036, "rewards/margins": 0.058696817606687546, "rewards/rejected": -0.06896857172250748, "step": 11810 }, { "epoch": 0.77, "learning_rate": 7.424571156515412e-07, "logits/chosen": -2.234841823577881, "logits/rejected": -2.2243666648864746, "logps/chosen": -177.17324829101562, "logps/rejected": -210.3824005126953, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": 0.005319344811141491, "rewards/margins": 0.10532574355602264, "rewards/rejected": -0.10000641644001007, "step": 11820 }, { "epoch": 0.77, "learning_rate": 7.38400969857847e-07, "logits/chosen": -2.183497905731201, "logits/rejected": -1.9732284545898438, "logps/chosen": -189.73611450195312, "logps/rejected": -206.5034637451172, "loss": 0.6851, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06593702733516693, "rewards/margins": 0.13615167140960693, "rewards/rejected": -0.20208871364593506, "step": 11830 }, { "epoch": 0.77, "learning_rate": 7.343540134276225e-07, "logits/chosen": -2.3149163722991943, "logits/rejected": -2.23742938041687, "logps/chosen": -169.7374267578125, "logps/rejected": -179.54759216308594, "loss": 0.6905, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.009741841815412045, "rewards/margins": 0.0791388601064682, "rewards/rejected": -0.06939703226089478, "step": 11840 }, { "epoch": 0.78, "learning_rate": 7.303162674717762e-07, "logits/chosen": -2.2864696979522705, "logits/rejected": -1.8954169750213623, "logps/chosen": -213.3994140625, "logps/rejected": -165.81307983398438, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03171641379594803, "rewards/margins": 0.08249086141586304, "rewards/rejected": -0.11420726776123047, "step": 11850 }, { "epoch": 0.78, "learning_rate": 7.26287753053167e-07, "logits/chosen": -2.250080108642578, "logits/rejected": -2.17789888381958, "logps/chosen": -267.2779846191406, "logps/rejected": -273.6764831542969, "loss": 0.6886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.023018458858132362, "rewards/margins": 0.0766671746969223, "rewards/rejected": -0.09968564659357071, "step": 11860 }, { "epoch": 0.78, "learning_rate": 7.222684911865013e-07, "logits/chosen": -2.3542191982269287, "logits/rejected": -2.364485263824463, "logps/chosen": -207.5823516845703, "logps/rejected": -240.3794708251953, "loss": 0.6875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0041365777142345905, "rewards/margins": 0.11111694574356079, "rewards/rejected": -0.1152535229921341, "step": 11870 }, { "epoch": 0.78, "learning_rate": 7.182585028382166e-07, "logits/chosen": -2.407355785369873, "logits/rejected": -2.1081418991088867, "logps/chosen": -276.42279052734375, "logps/rejected": -252.2656707763672, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.0027120530139654875, "rewards/margins": 0.08292824774980545, "rewards/rejected": -0.08564029633998871, "step": 11880 }, { "epoch": 0.78, "learning_rate": 7.142578089263769e-07, "logits/chosen": -2.438586473464966, "logits/rejected": -2.1107470989227295, "logps/chosen": -319.8658447265625, "logps/rejected": -268.9220886230469, "loss": 0.6916, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.014372703619301319, "rewards/margins": 0.07856186479330063, "rewards/rejected": -0.09293456375598907, "step": 11890 }, { "epoch": 0.78, "learning_rate": 7.102664303205611e-07, "logits/chosen": -2.3172056674957275, "logits/rejected": -2.0737314224243164, "logps/chosen": -221.40579223632812, "logps/rejected": -211.95523071289062, "loss": 0.6871, "rewards/accuracies": 0.625, "rewards/chosen": -0.008432546630501747, "rewards/margins": 0.08426036685705185, "rewards/rejected": -0.09269289672374725, "step": 11900 }, { "epoch": 0.78, "eval_logits/chosen": -2.3179848194122314, "eval_logits/rejected": -2.130025625228882, "eval_logps/chosen": -232.76321411132812, "eval_logps/rejected": -220.74920654296875, "eval_loss": 0.6897937059402466, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.007582689169794321, "eval_rewards/margins": 0.08379034698009491, "eval_rewards/rejected": -0.09137304127216339, "eval_runtime": 712.893, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 11900 }, { "epoch": 0.78, "learning_rate": 7.062843878417566e-07, "logits/chosen": -2.4349122047424316, "logits/rejected": -2.2990574836730957, "logps/chosen": -222.23208618164062, "logps/rejected": -196.22946166992188, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012005344964563847, "rewards/margins": 0.07041595876216888, "rewards/rejected": -0.05841060355305672, "step": 11910 }, { "epoch": 0.78, "learning_rate": 7.023117022622458e-07, "logits/chosen": -2.355516195297241, "logits/rejected": -2.0010008811950684, "logps/chosen": -242.761962890625, "logps/rejected": -227.82077026367188, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04246622696518898, "rewards/margins": 0.0832718163728714, "rewards/rejected": -0.12573805451393127, "step": 11920 }, { "epoch": 0.78, "learning_rate": 6.983483943055042e-07, "logits/chosen": -2.251622200012207, "logits/rejected": -2.0690550804138184, "logps/chosen": -281.15789794921875, "logps/rejected": -231.5454864501953, "loss": 0.6889, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.006913202814757824, "rewards/margins": 0.07160593569278717, "rewards/rejected": -0.07851915061473846, "step": 11930 }, { "epoch": 0.78, "learning_rate": 6.943944846460859e-07, "logits/chosen": -2.2984964847564697, "logits/rejected": -2.2344911098480225, "logps/chosen": -218.5690460205078, "logps/rejected": -177.935791015625, "loss": 0.6924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.001852800720371306, "rewards/margins": 0.06108871102333069, "rewards/rejected": -0.059235911816358566, "step": 11940 }, { "epoch": 0.78, "learning_rate": 6.904499939095225e-07, "logits/chosen": -2.264219045639038, "logits/rejected": -2.2117581367492676, "logps/chosen": -222.3902587890625, "logps/rejected": -217.29684448242188, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": -0.00043100136099383235, "rewards/margins": 0.1023484319448471, "rewards/rejected": -0.10277943313121796, "step": 11950 }, { "epoch": 0.78, "learning_rate": 6.865149426722079e-07, "logits/chosen": -2.233142852783203, "logits/rejected": -2.1767783164978027, "logps/chosen": -274.33160400390625, "logps/rejected": -252.09359741210938, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": -0.03146423026919365, "rewards/margins": 0.0845610499382019, "rewards/rejected": -0.11602529138326645, "step": 11960 }, { "epoch": 0.78, "learning_rate": 6.825893514612985e-07, "logits/chosen": -2.071587324142456, "logits/rejected": -2.1735153198242188, "logps/chosen": -236.6058807373047, "logps/rejected": -243.20828247070312, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0003904670593328774, "rewards/margins": 0.08785964548587799, "rewards/rejected": -0.08825010061264038, "step": 11970 }, { "epoch": 0.78, "learning_rate": 6.786732407546001e-07, "logits/chosen": -2.084094285964966, "logits/rejected": -2.0345733165740967, "logps/chosen": -201.45138549804688, "logps/rejected": -169.8759765625, "loss": 0.6893, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.005701950751245022, "rewards/margins": 0.08140133321285248, "rewards/rejected": -0.08710329234600067, "step": 11980 }, { "epoch": 0.78, "learning_rate": 6.747666309804654e-07, "logits/chosen": -2.4778218269348145, "logits/rejected": -2.1540145874023438, "logps/chosen": -287.5825500488281, "logps/rejected": -215.3527374267578, "loss": 0.6906, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0009467907366342843, "rewards/margins": 0.07305634021759033, "rewards/rejected": -0.07210955023765564, "step": 11990 }, { "epoch": 0.79, "learning_rate": 6.708695425176831e-07, "logits/chosen": -2.1013684272766113, "logits/rejected": -2.102804660797119, "logps/chosen": -170.0596466064453, "logps/rejected": -200.979248046875, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010208925232291222, "rewards/margins": 0.1016424298286438, "rewards/rejected": -0.11185135692358017, "step": 12000 }, { "epoch": 0.79, "eval_logits/chosen": -2.3212053775787354, "eval_logits/rejected": -2.132889986038208, "eval_logps/chosen": -232.20034790039062, "eval_logps/rejected": -220.30197143554688, "eval_loss": 0.6897847652435303, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.001953852828592062, "eval_rewards/margins": 0.08494684100151062, "eval_rewards/rejected": -0.08690068125724792, "eval_runtime": 712.847, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 12000 }, { "epoch": 0.79, "learning_rate": 6.669819956953768e-07, "logits/chosen": -2.1582138538360596, "logits/rejected": -2.0927162170410156, "logps/chosen": -170.88861083984375, "logps/rejected": -186.5122833251953, "loss": 0.6899, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.005573832895606756, "rewards/margins": 0.07083684206008911, "rewards/rejected": -0.07641066610813141, "step": 12010 }, { "epoch": 0.79, "learning_rate": 6.631040107928957e-07, "logits/chosen": -2.4988465309143066, "logits/rejected": -2.1730918884277344, "logps/chosen": -268.5477294921875, "logps/rejected": -180.80491638183594, "loss": 0.691, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0036344178952276707, "rewards/margins": 0.08555683493614197, "rewards/rejected": -0.0891912430524826, "step": 12020 }, { "epoch": 0.79, "learning_rate": 6.592356080397072e-07, "logits/chosen": -2.393764019012451, "logits/rejected": -1.8318722248077393, "logps/chosen": -227.7593994140625, "logps/rejected": -179.75608825683594, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": 0.0052979374304413795, "rewards/margins": 0.08460094779729843, "rewards/rejected": -0.07930301129817963, "step": 12030 }, { "epoch": 0.79, "learning_rate": 6.553768076152963e-07, "logits/chosen": -2.2826318740844727, "logits/rejected": -2.3857228755950928, "logps/chosen": -157.96484375, "logps/rejected": -197.0670928955078, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0013273532968014479, "rewards/margins": 0.11998225748538971, "rewards/rejected": -0.11865489184856415, "step": 12040 }, { "epoch": 0.79, "learning_rate": 6.51527629649055e-07, "logits/chosen": -2.419004201889038, "logits/rejected": -2.2709298133850098, "logps/chosen": -254.87680053710938, "logps/rejected": -231.243408203125, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.012937125749886036, "rewards/margins": 0.05513680726289749, "rewards/rejected": -0.06807393580675125, "step": 12050 }, { "epoch": 0.79, "learning_rate": 6.476880942201824e-07, "logits/chosen": -2.5580201148986816, "logits/rejected": -2.1555469036102295, "logps/chosen": -238.6651611328125, "logps/rejected": -189.90174865722656, "loss": 0.688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.026725223287940025, "rewards/margins": 0.09860799461603165, "rewards/rejected": -0.07188276946544647, "step": 12060 }, { "epoch": 0.79, "learning_rate": 6.438582213575748e-07, "logits/chosen": -2.245471477508545, "logits/rejected": -2.217454195022583, "logps/chosen": -229.4067840576172, "logps/rejected": -247.80722045898438, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005097637884318829, "rewards/margins": 0.0725497156381607, "rewards/rejected": -0.0674520879983902, "step": 12070 }, { "epoch": 0.79, "learning_rate": 6.400380310397267e-07, "logits/chosen": -2.1843056678771973, "logits/rejected": -2.1890132427215576, "logps/chosen": -232.0767822265625, "logps/rejected": -268.4687805175781, "loss": 0.6922, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.00033568107755854726, "rewards/margins": 0.04980158433318138, "rewards/rejected": -0.04946590214967728, "step": 12080 }, { "epoch": 0.79, "learning_rate": 6.362275431946202e-07, "logits/chosen": -2.168041467666626, "logits/rejected": -2.201719284057617, "logps/chosen": -235.95803833007812, "logps/rejected": -248.0696563720703, "loss": 0.6896, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.003955559339374304, "rewards/margins": 0.06063423305749893, "rewards/rejected": -0.05667867138981819, "step": 12090 }, { "epoch": 0.79, "learning_rate": 6.324267776996285e-07, "logits/chosen": -2.389530658721924, "logits/rejected": -2.0050384998321533, "logps/chosen": -375.6055908203125, "logps/rejected": -265.6221618652344, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0102651696652174, "rewards/margins": 0.11997060477733612, "rewards/rejected": -0.13023580610752106, "step": 12100 }, { "epoch": 0.79, "eval_logits/chosen": -2.3229787349700928, "eval_logits/rejected": -2.134584426879883, "eval_logps/chosen": -231.93675231933594, "eval_logps/rejected": -219.7613525390625, "eval_loss": 0.6897767782211304, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.0006819861009716988, "eval_rewards/margins": 0.0821765884757042, "eval_rewards/rejected": -0.08149460703134537, "eval_runtime": 714.3235, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 12100 }, { "epoch": 0.79, "learning_rate": 6.286357543814045e-07, "logits/chosen": -2.2420029640197754, "logits/rejected": -2.154069662094116, "logps/chosen": -193.7761993408203, "logps/rejected": -272.2079772949219, "loss": 0.6872, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.002463629934936762, "rewards/margins": 0.10420586168766022, "rewards/rejected": -0.10666950047016144, "step": 12110 }, { "epoch": 0.79, "learning_rate": 6.248544930157838e-07, "logits/chosen": -2.3656675815582275, "logits/rejected": -2.122084617614746, "logps/chosen": -183.5366973876953, "logps/rejected": -178.4772186279297, "loss": 0.6866, "rewards/accuracies": 0.75, "rewards/chosen": 0.005488743539899588, "rewards/margins": 0.12721005082130432, "rewards/rejected": -0.12172132730484009, "step": 12120 }, { "epoch": 0.79, "learning_rate": 6.21083013327678e-07, "logits/chosen": -2.3051021099090576, "logits/rejected": -2.202974796295166, "logps/chosen": -300.95269775390625, "logps/rejected": -251.5149383544922, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.032880254089832306, "rewards/margins": 0.06488000601530075, "rewards/rejected": -0.03199975565075874, "step": 12130 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -2.2721495628356934, "logits/rejected": -2.188018798828125, "logps/chosen": -200.94345092773438, "logps/rejected": -172.30050659179688, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.0013894874136894941, "rewards/margins": 0.06594385951757431, "rewards/rejected": -0.06733334064483643, "step": 12140 }, { "epoch": 0.79, "learning_rate": 6.135694776284243e-07, "logits/chosen": -2.431753635406494, "logits/rejected": -2.160250425338745, "logps/chosen": -269.80584716796875, "logps/rejected": -216.8905487060547, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.009851393289864063, "rewards/margins": 0.11856885999441147, "rewards/rejected": -0.10871747881174088, "step": 12150 }, { "epoch": 0.8, "learning_rate": 6.098274608115595e-07, "logits/chosen": -2.2147669792175293, "logits/rejected": -2.0826354026794434, "logps/chosen": -204.7724151611328, "logps/rejected": -179.89138793945312, "loss": 0.693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008782130666077137, "rewards/margins": 0.037513960152864456, "rewards/rejected": -0.028731834143400192, "step": 12160 }, { "epoch": 0.8, "learning_rate": 6.060953040605697e-07, "logits/chosen": -2.4421582221984863, "logits/rejected": -1.913739562034607, "logps/chosen": -334.14593505859375, "logps/rejected": -275.56756591796875, "loss": 0.6909, "rewards/accuracies": 0.75, "rewards/chosen": 0.04983791708946228, "rewards/margins": 0.11316549777984619, "rewards/rejected": -0.0633275955915451, "step": 12170 }, { "epoch": 0.8, "learning_rate": 6.023730268442144e-07, "logits/chosen": -2.23698091506958, "logits/rejected": -2.057175636291504, "logps/chosen": -202.0666046142578, "logps/rejected": -185.1787109375, "loss": 0.6871, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.027542103081941605, "rewards/margins": 0.11556919664144516, "rewards/rejected": -0.08802708983421326, "step": 12180 }, { "epoch": 0.8, "learning_rate": 5.986606485797131e-07, "logits/chosen": -2.234809160232544, "logits/rejected": -2.023869752883911, "logps/chosen": -199.58416748046875, "logps/rejected": -211.5013885498047, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.003064130200073123, "rewards/margins": 0.06851175427436829, "rewards/rejected": -0.0654476210474968, "step": 12190 }, { "epoch": 0.8, "learning_rate": 5.949581886326511e-07, "logits/chosen": -2.352273941040039, "logits/rejected": -2.33347749710083, "logps/chosen": -295.5454406738281, "logps/rejected": -271.19891357421875, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.022466326132416725, "rewards/margins": 0.0456775538623333, "rewards/rejected": -0.023211227729916573, "step": 12200 }, { "epoch": 0.8, "eval_logits/chosen": -2.3217251300811768, "eval_logits/rejected": -2.1334657669067383, "eval_logps/chosen": -230.84371948242188, "eval_logps/rejected": -218.59002685546875, "eval_loss": 0.6897699236869812, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": 0.011612382717430592, "eval_rewards/margins": 0.08139365911483765, "eval_rewards/rejected": -0.06978128105401993, "eval_runtime": 713.062, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 12200 }, { "epoch": 0.8, "learning_rate": 5.912656663168717e-07, "logits/chosen": -2.4126431941986084, "logits/rejected": -2.3553099632263184, "logps/chosen": -221.67831420898438, "logps/rejected": -221.48641967773438, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.023309772834181786, "rewards/margins": 0.06180506944656372, "rewards/rejected": -0.03849529102444649, "step": 12210 }, { "epoch": 0.8, "learning_rate": 5.875831008943817e-07, "logits/chosen": -2.158846616744995, "logits/rejected": -2.1420199871063232, "logps/chosen": -177.19949340820312, "logps/rejected": -160.8948516845703, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": 0.0044457814656198025, "rewards/margins": 0.06295563280582428, "rewards/rejected": -0.058509863913059235, "step": 12220 }, { "epoch": 0.8, "learning_rate": 5.839105115752442e-07, "logits/chosen": -2.284562110900879, "logits/rejected": -2.0982518196105957, "logps/chosen": -224.8004150390625, "logps/rejected": -193.5330352783203, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01822907105088234, "rewards/margins": 0.08740357309579849, "rewards/rejected": -0.10563264042139053, "step": 12230 }, { "epoch": 0.8, "learning_rate": 5.802479175174855e-07, "logits/chosen": -2.2706997394561768, "logits/rejected": -2.1211254596710205, "logps/chosen": -163.5981903076172, "logps/rejected": -178.07614135742188, "loss": 0.6896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.023960810154676437, "rewards/margins": 0.08550871908664703, "rewards/rejected": -0.061547912657260895, "step": 12240 }, { "epoch": 0.8, "learning_rate": 5.765953378269901e-07, "logits/chosen": -2.189150333404541, "logits/rejected": -2.127336025238037, "logps/chosen": -206.731689453125, "logps/rejected": -248.9289093017578, "loss": 0.6869, "rewards/accuracies": 0.625, "rewards/chosen": 0.0026171256322413683, "rewards/margins": 0.11153455078601837, "rewards/rejected": -0.10891741514205933, "step": 12250 }, { "epoch": 0.8, "learning_rate": 5.729527915574037e-07, "logits/chosen": -2.343411922454834, "logits/rejected": -2.2044577598571777, "logps/chosen": -220.0668487548828, "logps/rejected": -229.0086669921875, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0035170826595276594, "rewards/margins": 0.08602721989154816, "rewards/rejected": -0.08251012861728668, "step": 12260 }, { "epoch": 0.8, "learning_rate": 5.693202977100304e-07, "logits/chosen": -2.354865074157715, "logits/rejected": -2.067964553833008, "logps/chosen": -168.99929809570312, "logps/rejected": -172.4113311767578, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.011458302848041058, "rewards/margins": 0.06748346984386444, "rewards/rejected": -0.05602516978979111, "step": 12270 }, { "epoch": 0.8, "learning_rate": 5.656978752337389e-07, "logits/chosen": -2.3720412254333496, "logits/rejected": -2.168274402618408, "logps/chosen": -200.18475341796875, "logps/rejected": -212.4142303466797, "loss": 0.6878, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.00529628898948431, "rewards/margins": 0.10829710960388184, "rewards/rejected": -0.11359341442584991, "step": 12280 }, { "epoch": 0.8, "learning_rate": 5.620855430248581e-07, "logits/chosen": -2.269085645675659, "logits/rejected": -2.128979206085205, "logps/chosen": -160.27426147460938, "logps/rejected": -168.23971557617188, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.017207933589816093, "rewards/margins": 0.10143836587667465, "rewards/rejected": -0.0842304453253746, "step": 12290 }, { "epoch": 0.8, "learning_rate": 5.584833199270837e-07, "logits/chosen": -2.33870267868042, "logits/rejected": -2.1915290355682373, "logps/chosen": -228.42819213867188, "logps/rejected": -227.838623046875, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": 0.0020407275296747684, "rewards/margins": 0.07103622704744339, "rewards/rejected": -0.06899549812078476, "step": 12300 }, { "epoch": 0.8, "eval_logits/chosen": -2.3225796222686768, "eval_logits/rejected": -2.1341781616210938, "eval_logps/chosen": -231.32383728027344, "eval_logps/rejected": -219.537353515625, "eval_loss": 0.6897599101066589, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": 0.006811096332967281, "eval_rewards/margins": 0.08606572449207306, "eval_rewards/rejected": -0.0792546421289444, "eval_runtime": 712.8422, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 12300 }, { "epoch": 0.81, "learning_rate": 5.548912247313742e-07, "logits/chosen": -2.5504541397094727, "logits/rejected": -2.120837450027466, "logps/chosen": -284.99554443359375, "logps/rejected": -243.7223663330078, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011955673806369305, "rewards/margins": 0.06302481144666672, "rewards/rejected": -0.0749804899096489, "step": 12310 }, { "epoch": 0.81, "learning_rate": 5.513092761758596e-07, "logits/chosen": -2.367363452911377, "logits/rejected": -2.1712703704833984, "logps/chosen": -271.68951416015625, "logps/rejected": -208.8588104248047, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0047961072996258736, "rewards/margins": 0.0415019765496254, "rewards/rejected": -0.0462980791926384, "step": 12320 }, { "epoch": 0.81, "learning_rate": 5.477374929457363e-07, "logits/chosen": -2.2922816276550293, "logits/rejected": -2.291826009750366, "logps/chosen": -201.52459716796875, "logps/rejected": -188.91586303710938, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005365630611777306, "rewards/margins": 0.06563162058591843, "rewards/rejected": -0.06026599556207657, "step": 12330 }, { "epoch": 0.81, "learning_rate": 5.441758936731772e-07, "logits/chosen": -2.3044772148132324, "logits/rejected": -2.1925055980682373, "logps/chosen": -234.58203125, "logps/rejected": -225.813720703125, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.007632553577423096, "rewards/margins": 0.09021967649459839, "rewards/rejected": -0.08258712291717529, "step": 12340 }, { "epoch": 0.81, "learning_rate": 5.406244969372273e-07, "logits/chosen": -2.225092887878418, "logits/rejected": -2.0609354972839355, "logps/chosen": -199.0269012451172, "logps/rejected": -219.9811248779297, "loss": 0.6867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.006959347520023584, "rewards/margins": 0.14452257752418518, "rewards/rejected": -0.13756322860717773, "step": 12350 }, { "epoch": 0.81, "learning_rate": 5.370833212637122e-07, "logits/chosen": -2.2868409156799316, "logits/rejected": -1.9941694736480713, "logps/chosen": -218.37545776367188, "logps/rejected": -213.766357421875, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0013551099691540003, "rewards/margins": 0.09176047146320343, "rewards/rejected": -0.09040535986423492, "step": 12360 }, { "epoch": 0.81, "learning_rate": 5.335523851251392e-07, "logits/chosen": -2.239475727081299, "logits/rejected": -2.166835069656372, "logps/chosen": -206.76382446289062, "logps/rejected": -195.5325469970703, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0062995306216180325, "rewards/margins": 0.09566928446292877, "rewards/rejected": -0.10196882486343384, "step": 12370 }, { "epoch": 0.81, "learning_rate": 5.300317069406003e-07, "logits/chosen": -2.219008445739746, "logits/rejected": -2.191943407058716, "logps/chosen": -158.67398071289062, "logps/rejected": -173.11231994628906, "loss": 0.687, "rewards/accuracies": 0.625, "rewards/chosen": 0.020719021558761597, "rewards/margins": 0.10671563446521759, "rewards/rejected": -0.085996612906456, "step": 12380 }, { "epoch": 0.81, "learning_rate": 5.265213050756782e-07, "logits/chosen": -2.44820499420166, "logits/rejected": -2.2743122577667236, "logps/chosen": -229.58837890625, "logps/rejected": -236.1202850341797, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.024628793820738792, "rewards/margins": 0.08663028478622437, "rewards/rejected": -0.06200150400400162, "step": 12390 }, { "epoch": 0.81, "learning_rate": 5.230211978423477e-07, "logits/chosen": -2.3545405864715576, "logits/rejected": -2.235017776489258, "logps/chosen": -220.39273071289062, "logps/rejected": -212.700439453125, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01592477597296238, "rewards/margins": 0.07476507127285004, "rewards/rejected": -0.09068983793258667, "step": 12400 }, { "epoch": 0.81, "eval_logits/chosen": -2.3246352672576904, "eval_logits/rejected": -2.1361496448516846, "eval_logps/chosen": -230.83546447753906, "eval_logps/rejected": -218.64422607421875, "eval_loss": 0.6897544264793396, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": 0.0116947703063488, "eval_rewards/margins": 0.08201787620782852, "eval_rewards/rejected": -0.07032310217618942, "eval_runtime": 715.5021, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 12400 }, { "epoch": 0.81, "learning_rate": 5.195314034988835e-07, "logits/chosen": -2.4760658740997314, "logits/rejected": -2.2121901512145996, "logps/chosen": -217.1764678955078, "logps/rejected": -159.36187744140625, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.021106228232383728, "rewards/margins": 0.09224637597799301, "rewards/rejected": -0.07114015519618988, "step": 12410 }, { "epoch": 0.81, "learning_rate": 5.160519402497616e-07, "logits/chosen": -2.3596584796905518, "logits/rejected": -2.209519147872925, "logps/chosen": -230.1166229248047, "logps/rejected": -234.6483612060547, "loss": 0.6881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0006260558729991317, "rewards/margins": 0.0859164297580719, "rewards/rejected": -0.08654247969388962, "step": 12420 }, { "epoch": 0.81, "learning_rate": 5.125828262455679e-07, "logits/chosen": -2.256119728088379, "logits/rejected": -2.056142807006836, "logps/chosen": -254.59011840820312, "logps/rejected": -233.541748046875, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.011229803785681725, "rewards/margins": 0.09291192889213562, "rewards/rejected": -0.08168213069438934, "step": 12430 }, { "epoch": 0.81, "learning_rate": 5.091240795828992e-07, "logits/chosen": -2.019127368927002, "logits/rejected": -2.2054896354675293, "logps/chosen": -200.9918212890625, "logps/rejected": -231.53662109375, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.014986000955104828, "rewards/margins": 0.10714125633239746, "rewards/rejected": -0.09215524047613144, "step": 12440 }, { "epoch": 0.81, "learning_rate": 5.056757183042732e-07, "logits/chosen": -2.24973464012146, "logits/rejected": -2.1625046730041504, "logps/chosen": -232.5579071044922, "logps/rejected": -222.85122680664062, "loss": 0.6896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.009982489980757236, "rewards/margins": 0.10170789062976837, "rewards/rejected": -0.09172537922859192, "step": 12450 }, { "epoch": 0.82, "learning_rate": 5.022377603980308e-07, "logits/chosen": -2.4154021739959717, "logits/rejected": -2.081878185272217, "logps/chosen": -249.6466522216797, "logps/rejected": -192.86483764648438, "loss": 0.6883, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0036199470050632954, "rewards/margins": 0.09689504653215408, "rewards/rejected": -0.10051499307155609, "step": 12460 }, { "epoch": 0.82, "learning_rate": 4.988102237982454e-07, "logits/chosen": -2.36234974861145, "logits/rejected": -2.286999225616455, "logps/chosen": -223.90420532226562, "logps/rejected": -188.7877655029297, "loss": 0.6927, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.017237504944205284, "rewards/margins": 0.05458803102374077, "rewards/rejected": -0.0718255490064621, "step": 12470 }, { "epoch": 0.82, "learning_rate": 4.953931263846251e-07, "logits/chosen": -2.3473331928253174, "logits/rejected": -2.0705606937408447, "logps/chosen": -261.57763671875, "logps/rejected": -227.3242645263672, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.0013126353733241558, "rewards/margins": 0.0970732644200325, "rewards/rejected": -0.09576062858104706, "step": 12480 }, { "epoch": 0.82, "learning_rate": 4.919864859824266e-07, "logits/chosen": -2.291419267654419, "logits/rejected": -2.145946979522705, "logps/chosen": -238.6917724609375, "logps/rejected": -202.22686767578125, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.01257591973990202, "rewards/margins": 0.08570267260074615, "rewards/rejected": -0.09827860444784164, "step": 12490 }, { "epoch": 0.82, "learning_rate": 4.885903203623532e-07, "logits/chosen": -2.481529712677002, "logits/rejected": -2.0849225521087646, "logps/chosen": -283.2362365722656, "logps/rejected": -227.9224090576172, "loss": 0.6897, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.017776403576135635, "rewards/margins": 0.0845954492688179, "rewards/rejected": -0.06681904196739197, "step": 12500 }, { "epoch": 0.82, "eval_logits/chosen": -2.325686454772949, "eval_logits/rejected": -2.137054681777954, "eval_logps/chosen": -231.05908203125, "eval_logps/rejected": -218.7409210205078, "eval_loss": 0.6897637844085693, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": 0.009458942338824272, "eval_rewards/margins": 0.08074919879436493, "eval_rewards/rejected": -0.07129025459289551, "eval_runtime": 712.8253, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 12500 }, { "epoch": 0.82, "learning_rate": 4.852046472404695e-07, "logits/chosen": -2.4758987426757812, "logits/rejected": -1.738454818725586, "logps/chosen": -294.7528381347656, "logps/rejected": -177.1123504638672, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.031358979642391205, "rewards/margins": 0.08032914996147156, "rewards/rejected": -0.04897017404437065, "step": 12510 }, { "epoch": 0.82, "learning_rate": 4.818294842781035e-07, "logits/chosen": -2.3906936645507812, "logits/rejected": -2.208167552947998, "logps/chosen": -224.6951141357422, "logps/rejected": -184.04940795898438, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0151332076638937, "rewards/margins": 0.11712169647216797, "rewards/rejected": -0.10198847949504852, "step": 12520 }, { "epoch": 0.82, "learning_rate": 4.784648490817601e-07, "logits/chosen": -2.388882875442505, "logits/rejected": -2.094998836517334, "logps/chosen": -218.9868621826172, "logps/rejected": -182.89913940429688, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.011199096217751503, "rewards/margins": 0.07170800119638443, "rewards/rejected": -0.060508906841278076, "step": 12530 }, { "epoch": 0.82, "learning_rate": 4.751107592030235e-07, "logits/chosen": -2.399803400039673, "logits/rejected": -2.1163930892944336, "logps/chosen": -167.94920349121094, "logps/rejected": -162.90350341796875, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": 0.01902575045824051, "rewards/margins": 0.12054232507944107, "rewards/rejected": -0.10151656717061996, "step": 12540 }, { "epoch": 0.82, "learning_rate": 4.717672321384703e-07, "logits/chosen": -2.294178009033203, "logits/rejected": -2.0486741065979004, "logps/chosen": -220.0991973876953, "logps/rejected": -187.33387756347656, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03146491199731827, "rewards/margins": 0.094304159283638, "rewards/rejected": -0.06283925473690033, "step": 12550 }, { "epoch": 0.82, "learning_rate": 4.684342853295748e-07, "logits/chosen": -2.250814199447632, "logits/rejected": -2.1270248889923096, "logps/chosen": -185.5677032470703, "logps/rejected": -191.38328552246094, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02129070833325386, "rewards/margins": 0.09739609807729721, "rewards/rejected": -0.07610537856817245, "step": 12560 }, { "epoch": 0.82, "learning_rate": 4.651119361626213e-07, "logits/chosen": -2.5402443408966064, "logits/rejected": -2.2059483528137207, "logps/chosen": -238.58627319335938, "logps/rejected": -199.43728637695312, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.025323236361145973, "rewards/margins": 0.07079877704381943, "rewards/rejected": -0.045475538820028305, "step": 12570 }, { "epoch": 0.82, "learning_rate": 4.618002019686091e-07, "logits/chosen": -2.3017804622650146, "logits/rejected": -2.1221537590026855, "logps/chosen": -278.8421630859375, "logps/rejected": -230.077880859375, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0037017776630818844, "rewards/margins": 0.07871778309345245, "rewards/rejected": -0.07501600682735443, "step": 12580 }, { "epoch": 0.82, "learning_rate": 4.5849910002316757e-07, "logits/chosen": -2.3771467208862305, "logits/rejected": -2.034799575805664, "logps/chosen": -182.04556274414062, "logps/rejected": -158.250732421875, "loss": 0.6908, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.015334153547883034, "rewards/margins": 0.09469417482614517, "rewards/rejected": -0.11002832651138306, "step": 12590 }, { "epoch": 0.82, "learning_rate": 4.5520864754645984e-07, "logits/chosen": -2.421297550201416, "logits/rejected": -2.2816054821014404, "logps/chosen": -279.0403747558594, "logps/rejected": -242.0817108154297, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.023093996569514275, "rewards/margins": 0.06866296380758286, "rewards/rejected": -0.04556896537542343, "step": 12600 }, { "epoch": 0.82, "eval_logits/chosen": -2.3262624740600586, "eval_logits/rejected": -2.1376304626464844, "eval_logps/chosen": -231.39772033691406, "eval_logps/rejected": -219.05184936523438, "eval_loss": 0.6897605061531067, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": 0.006072386633604765, "eval_rewards/margins": 0.08047185838222504, "eval_rewards/rejected": -0.07439946383237839, "eval_runtime": 714.0825, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 12600 }, { "epoch": 0.83, "learning_rate": 4.5192886170309896e-07, "logits/chosen": -2.250743865966797, "logits/rejected": -2.1508920192718506, "logps/chosen": -190.7171630859375, "logps/rejected": -195.17050170898438, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0005129704950377345, "rewards/margins": 0.042692478746175766, "rewards/rejected": -0.04217951372265816, "step": 12610 }, { "epoch": 0.83, "learning_rate": 4.486597596020548e-07, "logits/chosen": -2.3476834297180176, "logits/rejected": -2.0744900703430176, "logps/chosen": -219.5320281982422, "logps/rejected": -190.0487518310547, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0004929341375827789, "rewards/margins": 0.08838485181331635, "rewards/rejected": -0.08789192140102386, "step": 12620 }, { "epoch": 0.83, "learning_rate": 4.454013582965644e-07, "logits/chosen": -2.2743661403656006, "logits/rejected": -1.8610731363296509, "logps/chosen": -263.5701904296875, "logps/rejected": -219.12197875976562, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.001134876860305667, "rewards/margins": 0.059951018542051315, "rewards/rejected": -0.06108590215444565, "step": 12630 }, { "epoch": 0.83, "learning_rate": 4.4215367478404605e-07, "logits/chosen": -2.125401020050049, "logits/rejected": -2.1011133193969727, "logps/chosen": -272.4917297363281, "logps/rejected": -303.33233642578125, "loss": 0.691, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.01731831021606922, "rewards/margins": 0.06468813121318817, "rewards/rejected": -0.08200643211603165, "step": 12640 }, { "epoch": 0.83, "learning_rate": 4.389167260060068e-07, "logits/chosen": -2.4071826934814453, "logits/rejected": -2.16564679145813, "logps/chosen": -205.21432495117188, "logps/rejected": -178.94972229003906, "loss": 0.6879, "rewards/accuracies": 0.625, "rewards/chosen": 0.03475916385650635, "rewards/margins": 0.11909898370504379, "rewards/rejected": -0.08433983474969864, "step": 12650 }, { "epoch": 0.83, "learning_rate": 4.356905288479579e-07, "logits/chosen": -2.2683780193328857, "logits/rejected": -2.039124011993408, "logps/chosen": -227.39346313476562, "logps/rejected": -212.4646453857422, "loss": 0.6859, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.008726147934794426, "rewards/margins": 0.13835129141807556, "rewards/rejected": -0.14707742631435394, "step": 12660 }, { "epoch": 0.83, "learning_rate": 4.3247510013932377e-07, "logits/chosen": -2.217339515686035, "logits/rejected": -2.0521254539489746, "logps/chosen": -252.25942993164062, "logps/rejected": -260.9900817871094, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0007096766494214535, "rewards/margins": 0.08910763263702393, "rewards/rejected": -0.08981730788946152, "step": 12670 }, { "epoch": 0.83, "learning_rate": 4.2927045665335594e-07, "logits/chosen": -1.9187930822372437, "logits/rejected": -1.8410171270370483, "logps/chosen": -167.76393127441406, "logps/rejected": -172.5765838623047, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0288618765771389, "rewards/margins": 0.08367923647165298, "rewards/rejected": -0.11254110187292099, "step": 12680 }, { "epoch": 0.83, "learning_rate": 4.260766151070439e-07, "logits/chosen": -2.1377556324005127, "logits/rejected": -2.1839041709899902, "logps/chosen": -224.4342803955078, "logps/rejected": -222.44125366210938, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.813423508196138e-06, "rewards/margins": 0.09267593920230865, "rewards/rejected": -0.09268374741077423, "step": 12690 }, { "epoch": 0.83, "learning_rate": 4.228935921610308e-07, "logits/chosen": -2.3698036670684814, "logits/rejected": -2.0076329708099365, "logps/chosen": -262.02777099609375, "logps/rejected": -204.87368774414062, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": 0.016908859834074974, "rewards/margins": 0.06897404789924622, "rewards/rejected": -0.052065182477235794, "step": 12700 }, { "epoch": 0.83, "eval_logits/chosen": -2.3263168334960938, "eval_logits/rejected": -2.1376240253448486, "eval_logps/chosen": -231.38568115234375, "eval_logps/rejected": -219.1470947265625, "eval_loss": 0.689755916595459, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": 0.0061928038485348225, "eval_rewards/margins": 0.08154484629631042, "eval_rewards/rejected": -0.07535204291343689, "eval_runtime": 712.3303, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 12700 }, { "epoch": 0.83, "learning_rate": 4.1972140441952246e-07, "logits/chosen": -2.178255081176758, "logits/rejected": -2.146824359893799, "logps/chosen": -236.79483032226562, "logps/rejected": -246.6728973388672, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.027884885668754578, "rewards/margins": 0.07868941873311996, "rewards/rejected": -0.05080454424023628, "step": 12710 }, { "epoch": 0.83, "learning_rate": 4.165600684302046e-07, "logits/chosen": -2.2848753929138184, "logits/rejected": -2.33852481842041, "logps/chosen": -173.56161499023438, "logps/rejected": -192.32496643066406, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": 0.022223882377147675, "rewards/margins": 0.08805432170629501, "rewards/rejected": -0.06583045423030853, "step": 12720 }, { "epoch": 0.83, "learning_rate": 4.13409600684154e-07, "logits/chosen": -2.392894744873047, "logits/rejected": -2.1074211597442627, "logps/chosen": -215.2464599609375, "logps/rejected": -195.3648681640625, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0022683297283947468, "rewards/margins": 0.09301736205816269, "rewards/rejected": -0.09528569132089615, "step": 12730 }, { "epoch": 0.83, "learning_rate": 4.102700176157548e-07, "logits/chosen": -2.457080364227295, "logits/rejected": -2.087562084197998, "logps/chosen": -324.733642578125, "logps/rejected": -234.39254760742188, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00021561775065492839, "rewards/margins": 0.07445430010557175, "rewards/rejected": -0.07423867285251617, "step": 12740 }, { "epoch": 0.83, "learning_rate": 4.0714133560260884e-07, "logits/chosen": -2.3434674739837646, "logits/rejected": -2.1800377368927, "logps/chosen": -259.04107666015625, "logps/rejected": -207.67910766601562, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": 0.004920002073049545, "rewards/margins": 0.06868889182806015, "rewards/rejected": -0.0637688934803009, "step": 12750 }, { "epoch": 0.83, "learning_rate": 4.0402357096545527e-07, "logits/chosen": -2.2215254306793213, "logits/rejected": -2.1798095703125, "logps/chosen": -250.39193725585938, "logps/rejected": -248.3852081298828, "loss": 0.6899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013696703128516674, "rewards/margins": 0.0816536545753479, "rewards/rejected": -0.06795695424079895, "step": 12760 }, { "epoch": 0.84, "learning_rate": 4.0091673996808025e-07, "logits/chosen": -2.4354679584503174, "logits/rejected": -2.2336437702178955, "logps/chosen": -198.1351776123047, "logps/rejected": -181.49725341796875, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010751022025942802, "rewards/margins": 0.07989239692687988, "rewards/rejected": -0.09064342081546783, "step": 12770 }, { "epoch": 0.84, "learning_rate": 3.9782085881723776e-07, "logits/chosen": -2.2519314289093018, "logits/rejected": -2.1054606437683105, "logps/chosen": -164.67416381835938, "logps/rejected": -188.26077270507812, "loss": 0.6886, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009234304539859295, "rewards/margins": 0.11619944870471954, "rewards/rejected": -0.10696514695882797, "step": 12780 }, { "epoch": 0.84, "learning_rate": 3.947359436625592e-07, "logits/chosen": -2.2633697986602783, "logits/rejected": -2.157527446746826, "logps/chosen": -232.0320281982422, "logps/rejected": -205.00436401367188, "loss": 0.6891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.02630910649895668, "rewards/margins": 0.1053650975227356, "rewards/rejected": -0.07905599474906921, "step": 12790 }, { "epoch": 0.84, "learning_rate": 3.9166201059647386e-07, "logits/chosen": -2.399799346923828, "logits/rejected": -2.2643110752105713, "logps/chosen": -259.34234619140625, "logps/rejected": -227.6080780029297, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.02507871761918068, "rewards/margins": 0.051967114210128784, "rewards/rejected": -0.026888396590948105, "step": 12800 }, { "epoch": 0.84, "eval_logits/chosen": -2.3278772830963135, "eval_logits/rejected": -2.138990640640259, "eval_logps/chosen": -230.71701049804688, "eval_logps/rejected": -218.49427795410156, "eval_loss": 0.6897500157356262, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.012879305519163609, "eval_rewards/margins": 0.08170315623283386, "eval_rewards/rejected": -0.06882384419441223, "eval_runtime": 711.356, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 12800 }, { "epoch": 0.84, "learning_rate": 3.8859907565412194e-07, "logits/chosen": -2.208301544189453, "logits/rejected": -2.289896249771118, "logps/chosen": -179.2988739013672, "logps/rejected": -186.98802185058594, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": 0.0035035633482038975, "rewards/margins": 0.08267536014318466, "rewards/rejected": -0.0791717916727066, "step": 12810 }, { "epoch": 0.84, "learning_rate": 3.8554715481327303e-07, "logits/chosen": -2.365440845489502, "logits/rejected": -1.9772167205810547, "logps/chosen": -233.1007843017578, "logps/rejected": -207.98318481445312, "loss": 0.6874, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0015824921429157257, "rewards/margins": 0.10280168056488037, "rewards/rejected": -0.10121919214725494, "step": 12820 }, { "epoch": 0.84, "learning_rate": 3.8250626399424007e-07, "logits/chosen": -2.3790652751922607, "logits/rejected": -2.1290512084960938, "logps/chosen": -249.7417449951172, "logps/rejected": -239.1602020263672, "loss": 0.6902, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.017364097759127617, "rewards/margins": 0.08268658816814423, "rewards/rejected": -0.06532249599695206, "step": 12830 }, { "epoch": 0.84, "learning_rate": 3.7947641905980104e-07, "logits/chosen": -2.203456163406372, "logits/rejected": -2.2128920555114746, "logps/chosen": -205.8812713623047, "logps/rejected": -179.02224731445312, "loss": 0.6892, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.014371681027114391, "rewards/margins": 0.07821665704250336, "rewards/rejected": -0.0638449639081955, "step": 12840 }, { "epoch": 0.84, "learning_rate": 3.764576358151098e-07, "logits/chosen": -2.238459825515747, "logits/rejected": -2.233910322189331, "logps/chosen": -173.391845703125, "logps/rejected": -167.51803588867188, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01425178349018097, "rewards/margins": 0.059704847633838654, "rewards/rejected": -0.045453060418367386, "step": 12850 }, { "epoch": 0.84, "learning_rate": 3.7344993000761944e-07, "logits/chosen": -2.349444627761841, "logits/rejected": -2.242591381072998, "logps/chosen": -178.75843811035156, "logps/rejected": -230.89614868164062, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.014432880096137524, "rewards/margins": 0.0916595309972763, "rewards/rejected": -0.10609239339828491, "step": 12860 }, { "epoch": 0.84, "learning_rate": 3.7045331732699585e-07, "logits/chosen": -2.3606972694396973, "logits/rejected": -2.171160936355591, "logps/chosen": -203.33251953125, "logps/rejected": -180.00003051757812, "loss": 0.6859, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01865537092089653, "rewards/margins": 0.12366944551467896, "rewards/rejected": -0.10501406341791153, "step": 12870 }, { "epoch": 0.84, "learning_rate": 3.6746781340503993e-07, "logits/chosen": -2.2035956382751465, "logits/rejected": -2.104219436645508, "logps/chosen": -231.5043487548828, "logps/rejected": -232.2509307861328, "loss": 0.6872, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.029114311560988426, "rewards/margins": 0.0877356305718422, "rewards/rejected": -0.058621324598789215, "step": 12880 }, { "epoch": 0.84, "learning_rate": 3.6449343381560116e-07, "logits/chosen": -2.2976372241973877, "logits/rejected": -2.071730613708496, "logps/chosen": -232.3043212890625, "logps/rejected": -229.9260711669922, "loss": 0.6893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.008786660619080067, "rewards/margins": 0.11011286079883575, "rewards/rejected": -0.11889950931072235, "step": 12890 }, { "epoch": 0.84, "learning_rate": 3.615301940745017e-07, "logits/chosen": -2.5623362064361572, "logits/rejected": -1.9647992849349976, "logps/chosen": -314.986083984375, "logps/rejected": -217.2484130859375, "loss": 0.6911, "rewards/accuracies": 0.75, "rewards/chosen": 0.022090371698141098, "rewards/margins": 0.07439263164997101, "rewards/rejected": -0.05230225995182991, "step": 12900 }, { "epoch": 0.84, "eval_logits/chosen": -2.3258814811706543, "eval_logits/rejected": -2.1371657848358154, "eval_logps/chosen": -230.18865966796875, "eval_logps/rejected": -218.1457061767578, "eval_loss": 0.6897482872009277, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": 0.0181629229336977, "eval_rewards/margins": 0.0835009291768074, "eval_rewards/rejected": -0.06533800810575485, "eval_runtime": 710.8607, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 12900 }, { "epoch": 0.84, "learning_rate": 3.5857810963945084e-07, "logits/chosen": -2.1857872009277344, "logits/rejected": -1.9668527841567993, "logps/chosen": -214.0235595703125, "logps/rejected": -207.2059783935547, "loss": 0.6891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.004855555482208729, "rewards/margins": 0.08005331456661224, "rewards/rejected": -0.07519775629043579, "step": 12910 }, { "epoch": 0.85, "learning_rate": 3.556371959099678e-07, "logits/chosen": -2.3944733142852783, "logits/rejected": -2.1525301933288574, "logps/chosen": -294.125732421875, "logps/rejected": -261.60772705078125, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.030485982075333595, "rewards/margins": 0.07452499121427536, "rewards/rejected": -0.04403900355100632, "step": 12920 }, { "epoch": 0.85, "learning_rate": 3.5270746822729797e-07, "logits/chosen": -2.280972957611084, "logits/rejected": -2.2035224437713623, "logps/chosen": -246.06930541992188, "logps/rejected": -267.5752258300781, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.014127634465694427, "rewards/margins": 0.08930746465921402, "rewards/rejected": -0.07517983019351959, "step": 12930 }, { "epoch": 0.85, "learning_rate": 3.4978894187433746e-07, "logits/chosen": -2.3760852813720703, "logits/rejected": -2.23614501953125, "logps/chosen": -152.7420196533203, "logps/rejected": -146.85598754882812, "loss": 0.6902, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.016667893156409264, "rewards/margins": 0.05022420361638069, "rewards/rejected": -0.0668920949101448, "step": 12940 }, { "epoch": 0.85, "learning_rate": 3.468816320755486e-07, "logits/chosen": -2.163353443145752, "logits/rejected": -1.9819806814193726, "logps/chosen": -220.0610809326172, "logps/rejected": -186.81573486328125, "loss": 0.6903, "rewards/accuracies": 0.625, "rewards/chosen": 0.03866659849882126, "rewards/margins": 0.06364385038614273, "rewards/rejected": -0.02497725561261177, "step": 12950 }, { "epoch": 0.85, "learning_rate": 3.4398555399688336e-07, "logits/chosen": -2.425262212753296, "logits/rejected": -2.0546271800994873, "logps/chosen": -211.6660919189453, "logps/rejected": -196.01766967773438, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0031560491770505905, "rewards/margins": 0.05015747994184494, "rewards/rejected": -0.05331353470683098, "step": 12960 }, { "epoch": 0.85, "learning_rate": 3.411007227457047e-07, "logits/chosen": -2.327322006225586, "logits/rejected": -2.270181179046631, "logps/chosen": -244.98068237304688, "logps/rejected": -223.50830078125, "loss": 0.6875, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02780618704855442, "rewards/margins": 0.10802390426397324, "rewards/rejected": -0.08021771907806396, "step": 12970 }, { "epoch": 0.85, "learning_rate": 3.382271533707043e-07, "logits/chosen": -2.24385404586792, "logits/rejected": -2.227750301361084, "logps/chosen": -189.8992919921875, "logps/rejected": -173.1783447265625, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01705920323729515, "rewards/margins": 0.0616273507475853, "rewards/rejected": -0.044568147510290146, "step": 12980 }, { "epoch": 0.85, "learning_rate": 3.353648608618287e-07, "logits/chosen": -2.3153414726257324, "logits/rejected": -2.06382417678833, "logps/chosen": -167.10702514648438, "logps/rejected": -164.1302490234375, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00764242559671402, "rewards/margins": 0.0699392706155777, "rewards/rejected": -0.06229684501886368, "step": 12990 }, { "epoch": 0.85, "learning_rate": 3.3251386015019676e-07, "logits/chosen": -2.3504998683929443, "logits/rejected": -2.136061191558838, "logps/chosen": -201.58786010742188, "logps/rejected": -176.71900939941406, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.011715460568666458, "rewards/margins": 0.0887567475438118, "rewards/rejected": -0.07704129070043564, "step": 13000 }, { "epoch": 0.85, "eval_logits/chosen": -2.3278446197509766, "eval_logits/rejected": -2.138990640640259, "eval_logps/chosen": -230.5150146484375, "eval_logps/rejected": -218.6830596923828, "eval_loss": 0.6897379159927368, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": 0.014899209141731262, "eval_rewards/margins": 0.0856110006570816, "eval_rewards/rejected": -0.07071178406476974, "eval_runtime": 711.2988, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 13000 }, { "epoch": 0.85, "learning_rate": 3.296741661080255e-07, "logits/chosen": -2.2785840034484863, "logits/rejected": -2.186216115951538, "logps/chosen": -232.2931671142578, "logps/rejected": -235.34976196289062, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": 0.008452139794826508, "rewards/margins": 0.09597768634557724, "rewards/rejected": -0.08752553910017014, "step": 13010 }, { "epoch": 0.85, "learning_rate": 3.2684579354854974e-07, "logits/chosen": -2.3801522254943848, "logits/rejected": -2.2735419273376465, "logps/chosen": -294.13189697265625, "logps/rejected": -312.67303466796875, "loss": 0.6917, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.016732942312955856, "rewards/margins": 0.08282653987407684, "rewards/rejected": -0.099559485912323, "step": 13020 }, { "epoch": 0.85, "learning_rate": 3.2402875722594653e-07, "logits/chosen": -2.408092737197876, "logits/rejected": -2.130885362625122, "logps/chosen": -165.7288360595703, "logps/rejected": -181.1701202392578, "loss": 0.6893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.03316589817404747, "rewards/margins": 0.09289722144603729, "rewards/rejected": -0.05973132699728012, "step": 13030 }, { "epoch": 0.85, "learning_rate": 3.212230718352566e-07, "logits/chosen": -2.2618842124938965, "logits/rejected": -2.2778449058532715, "logps/chosen": -224.6171875, "logps/rejected": -162.24813842773438, "loss": 0.6932, "rewards/accuracies": 0.625, "rewards/chosen": 0.004326606169342995, "rewards/margins": 0.013279316015541553, "rewards/rejected": -0.008952709846198559, "step": 13040 }, { "epoch": 0.85, "learning_rate": 3.1842875201231025e-07, "logits/chosen": -2.3244717121124268, "logits/rejected": -2.0360231399536133, "logps/chosen": -220.89639282226562, "logps/rejected": -202.62872314453125, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008436797186732292, "rewards/margins": 0.07507555186748505, "rewards/rejected": -0.0666387528181076, "step": 13050 }, { "epoch": 0.85, "learning_rate": 3.156458123336478e-07, "logits/chosen": -2.1647417545318604, "logits/rejected": -1.9881470203399658, "logps/chosen": -156.07313537597656, "logps/rejected": -158.6283721923828, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.024387424811720848, "rewards/margins": 0.12704019248485565, "rewards/rejected": -0.10265277326107025, "step": 13060 }, { "epoch": 0.86, "learning_rate": 3.128742673164459e-07, "logits/chosen": -2.402404308319092, "logits/rejected": -2.034379720687866, "logps/chosen": -280.46630859375, "logps/rejected": -245.130615234375, "loss": 0.6902, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.024609588086605072, "rewards/margins": 0.09180278331041336, "rewards/rejected": -0.06719318777322769, "step": 13070 }, { "epoch": 0.86, "learning_rate": 3.101141314184414e-07, "logits/chosen": -2.511915683746338, "logits/rejected": -2.2664966583251953, "logps/chosen": -203.702880859375, "logps/rejected": -200.3907928466797, "loss": 0.6924, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.016816768795251846, "rewards/margins": 0.058991938829422, "rewards/rejected": -0.04217516630887985, "step": 13080 }, { "epoch": 0.86, "learning_rate": 3.0736541903785526e-07, "logits/chosen": -2.164177417755127, "logits/rejected": -2.129770517349243, "logps/chosen": -207.080322265625, "logps/rejected": -264.8565368652344, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.019345903769135475, "rewards/margins": 0.08316637575626373, "rewards/rejected": -0.06382046639919281, "step": 13090 }, { "epoch": 0.86, "learning_rate": 3.0462814451331704e-07, "logits/chosen": -2.218174934387207, "logits/rejected": -2.0847418308258057, "logps/chosen": -229.975341796875, "logps/rejected": -235.6430206298828, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.007010665722191334, "rewards/margins": 0.05762631446123123, "rewards/rejected": -0.05061563849449158, "step": 13100 }, { "epoch": 0.86, "eval_logits/chosen": -2.326002836227417, "eval_logits/rejected": -2.1373050212860107, "eval_logps/chosen": -230.6532745361328, "eval_logps/rejected": -218.62350463867188, "eval_loss": 0.6897422075271606, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.013516743667423725, "eval_rewards/margins": 0.0836327001452446, "eval_rewards/rejected": -0.07011596858501434, "eval_runtime": 712.647, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 13100 }, { "epoch": 0.86, "learning_rate": 3.019023221237927e-07, "logits/chosen": -2.2801291942596436, "logits/rejected": -2.1055219173431396, "logps/chosen": -236.72677612304688, "logps/rejected": -187.6962890625, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 5.53287572984118e-05, "rewards/margins": 0.08536889404058456, "rewards/rejected": -0.08531356602907181, "step": 13110 }, { "epoch": 0.86, "learning_rate": 2.991879660885058e-07, "logits/chosen": -2.4416868686676025, "logits/rejected": -2.1827034950256348, "logps/chosen": -261.5027770996094, "logps/rejected": -253.5607147216797, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020441105589270592, "rewards/margins": 0.08418162912130356, "rewards/rejected": -0.06374052911996841, "step": 13120 }, { "epoch": 0.86, "learning_rate": 2.9648509056686786e-07, "logits/chosen": -2.3484883308410645, "logits/rejected": -2.1797919273376465, "logps/chosen": -175.03335571289062, "logps/rejected": -158.2578887939453, "loss": 0.6874, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01908428594470024, "rewards/margins": 0.08800263702869415, "rewards/rejected": -0.06891834735870361, "step": 13130 }, { "epoch": 0.86, "learning_rate": 2.937937096584012e-07, "logits/chosen": -2.3359837532043457, "logits/rejected": -2.114428758621216, "logps/chosen": -287.9132385253906, "logps/rejected": -234.6566619873047, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": 0.024876803159713745, "rewards/margins": 0.07227373123168945, "rewards/rejected": -0.04739692062139511, "step": 13140 }, { "epoch": 0.86, "learning_rate": 2.9111383740266756e-07, "logits/chosen": -2.134230136871338, "logits/rejected": -2.000624179840088, "logps/chosen": -234.1415557861328, "logps/rejected": -235.3389434814453, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.016426388174295425, "rewards/margins": 0.06985460221767426, "rewards/rejected": -0.05342821404337883, "step": 13150 }, { "epoch": 0.86, "learning_rate": 2.8844548777919255e-07, "logits/chosen": -2.374894618988037, "logits/rejected": -2.0936150550842285, "logps/chosen": -201.53781127929688, "logps/rejected": -187.21641540527344, "loss": 0.6889, "rewards/accuracies": 0.625, "rewards/chosen": 0.02346022054553032, "rewards/margins": 0.0795883983373642, "rewards/rejected": -0.056128181517124176, "step": 13160 }, { "epoch": 0.86, "learning_rate": 2.8578867470739594e-07, "logits/chosen": -2.2102534770965576, "logits/rejected": -2.069348096847534, "logps/chosen": -184.6317138671875, "logps/rejected": -164.10693359375, "loss": 0.6876, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.001433709287084639, "rewards/margins": 0.1089792400598526, "rewards/rejected": -0.11041294038295746, "step": 13170 }, { "epoch": 0.86, "learning_rate": 2.8314341204651484e-07, "logits/chosen": -2.4396963119506836, "logits/rejected": -2.1671500205993652, "logps/chosen": -273.39495849609375, "logps/rejected": -211.6111602783203, "loss": 0.6875, "rewards/accuracies": 0.625, "rewards/chosen": 0.026441499590873718, "rewards/margins": 0.1099875345826149, "rewards/rejected": -0.08354604244232178, "step": 13180 }, { "epoch": 0.86, "learning_rate": 2.805097135955362e-07, "logits/chosen": -2.3553264141082764, "logits/rejected": -2.145498037338257, "logps/chosen": -210.2996368408203, "logps/rejected": -186.16250610351562, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.022243741899728775, "rewards/margins": 0.10788760334253311, "rewards/rejected": -0.08564385771751404, "step": 13190 }, { "epoch": 0.86, "learning_rate": 2.778875930931213e-07, "logits/chosen": -2.3537840843200684, "logits/rejected": -2.0290145874023438, "logps/chosen": -233.2071075439453, "logps/rejected": -225.6391143798828, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.019117634743452072, "rewards/margins": 0.09794165939092636, "rewards/rejected": -0.07882402837276459, "step": 13200 }, { "epoch": 0.86, "eval_logits/chosen": -2.3253469467163086, "eval_logits/rejected": -2.136681318283081, "eval_logps/chosen": -230.8813018798828, "eval_logps/rejected": -218.95068359375, "eval_loss": 0.6897428035736084, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.011236421763896942, "eval_rewards/margins": 0.08462419360876083, "eval_rewards/rejected": -0.0733877643942833, "eval_runtime": 711.1811, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 13200 }, { "epoch": 0.86, "learning_rate": 2.7527706421753426e-07, "logits/chosen": -2.320481777191162, "logits/rejected": -2.259533643722534, "logps/chosen": -198.22946166992188, "logps/rejected": -209.1739044189453, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0017170917708426714, "rewards/margins": 0.06455695629119873, "rewards/rejected": -0.06283987313508987, "step": 13210 }, { "epoch": 0.86, "learning_rate": 2.726781405865736e-07, "logits/chosen": -2.4160947799682617, "logits/rejected": -1.8638538122177124, "logps/chosen": -302.37939453125, "logps/rejected": -195.12802124023438, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0028636218048632145, "rewards/margins": 0.08517131209373474, "rewards/rejected": -0.08230768889188766, "step": 13220 }, { "epoch": 0.87, "learning_rate": 2.7009083575749687e-07, "logits/chosen": -2.3083367347717285, "logits/rejected": -2.20975661277771, "logps/chosen": -243.7759246826172, "logps/rejected": -248.42660522460938, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0008438148652203381, "rewards/margins": 0.06286215782165527, "rewards/rejected": -0.062018342316150665, "step": 13230 }, { "epoch": 0.87, "learning_rate": 2.6751516322695457e-07, "logits/chosen": -2.380704402923584, "logits/rejected": -2.3123157024383545, "logps/chosen": -188.8930206298828, "logps/rejected": -188.26637268066406, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.013505371287465096, "rewards/margins": 0.054132528603076935, "rewards/rejected": -0.04062715917825699, "step": 13240 }, { "epoch": 0.87, "learning_rate": 2.649511364309154e-07, "logits/chosen": -2.315520763397217, "logits/rejected": -2.2880232334136963, "logps/chosen": -201.6102294921875, "logps/rejected": -189.54742431640625, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006794482469558716, "rewards/margins": 0.08818355947732925, "rewards/rejected": -0.08138908445835114, "step": 13250 }, { "epoch": 0.87, "learning_rate": 2.6239876874460003e-07, "logits/chosen": -2.4259033203125, "logits/rejected": -2.2980501651763916, "logps/chosen": -282.17254638671875, "logps/rejected": -264.3243713378906, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": 0.022426238283514977, "rewards/margins": 0.12707999348640442, "rewards/rejected": -0.10465376079082489, "step": 13260 }, { "epoch": 0.87, "learning_rate": 2.5985807348240744e-07, "logits/chosen": -2.4714465141296387, "logits/rejected": -2.0047621726989746, "logps/chosen": -228.8441925048828, "logps/rejected": -192.6537322998047, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.034993596374988556, "rewards/margins": 0.11756626516580582, "rewards/rejected": -0.08257267624139786, "step": 13270 }, { "epoch": 0.87, "learning_rate": 2.5732906389785014e-07, "logits/chosen": -2.3537182807922363, "logits/rejected": -2.1851718425750732, "logps/chosen": -274.45111083984375, "logps/rejected": -248.9810333251953, "loss": 0.6862, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0325496569275856, "rewards/margins": 0.12675470113754272, "rewards/rejected": -0.09420505911111832, "step": 13280 }, { "epoch": 0.87, "learning_rate": 2.5481175318347956e-07, "logits/chosen": -2.2177436351776123, "logits/rejected": -2.26359224319458, "logps/chosen": -225.0997314453125, "logps/rejected": -252.0758819580078, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.023978976532816887, "rewards/margins": 0.09080135077238083, "rewards/rejected": -0.06682237982749939, "step": 13290 }, { "epoch": 0.87, "learning_rate": 2.5230615447082246e-07, "logits/chosen": -2.3211140632629395, "logits/rejected": -1.975968360900879, "logps/chosen": -250.9895477294922, "logps/rejected": -244.07345581054688, "loss": 0.6891, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.003271388355642557, "rewards/margins": 0.07869114726781845, "rewards/rejected": -0.07541977614164352, "step": 13300 }, { "epoch": 0.87, "eval_logits/chosen": -2.324570894241333, "eval_logits/rejected": -2.1359615325927734, "eval_logps/chosen": -230.75726318359375, "eval_logps/rejected": -218.9420623779297, "eval_loss": 0.6897307634353638, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": 0.012476898729801178, "eval_rewards/margins": 0.08577845990657806, "eval_rewards/rejected": -0.07330156117677689, "eval_runtime": 715.3491, "eval_samples_per_second": 2.796, "eval_steps_per_second": 1.398, "step": 13300 }, { "epoch": 0.87, "learning_rate": 2.49812280830308e-07, "logits/chosen": -2.3523142337799072, "logits/rejected": -1.8849273920059204, "logps/chosen": -222.9706268310547, "logps/rejected": -205.4813995361328, "loss": 0.6848, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.02066592127084732, "rewards/margins": 0.17366810142993927, "rewards/rejected": -0.15300217270851135, "step": 13310 }, { "epoch": 0.87, "learning_rate": 2.4733014527120457e-07, "logits/chosen": -2.2234625816345215, "logits/rejected": -2.0490236282348633, "logps/chosen": -206.6344451904297, "logps/rejected": -194.33306884765625, "loss": 0.6906, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04219576343894005, "rewards/margins": 0.10591878741979599, "rewards/rejected": -0.14811456203460693, "step": 13320 }, { "epoch": 0.87, "learning_rate": 2.4485976074154565e-07, "logits/chosen": -2.285674571990967, "logits/rejected": -2.3419785499572754, "logps/chosen": -208.6743927001953, "logps/rejected": -240.1510009765625, "loss": 0.6917, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.003236269112676382, "rewards/margins": 0.01433448027819395, "rewards/rejected": -0.011098211631178856, "step": 13330 }, { "epoch": 0.87, "learning_rate": 2.4240114012806763e-07, "logits/chosen": -2.310711622238159, "logits/rejected": -2.287083148956299, "logps/chosen": -205.61257934570312, "logps/rejected": -194.80186462402344, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02185620740056038, "rewards/margins": 0.06259147822856903, "rewards/rejected": -0.04073526710271835, "step": 13340 }, { "epoch": 0.87, "learning_rate": 2.399542962561399e-07, "logits/chosen": -2.22048282623291, "logits/rejected": -2.0561389923095703, "logps/chosen": -222.4910125732422, "logps/rejected": -185.62081909179688, "loss": 0.6854, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.029649171978235245, "rewards/margins": 0.10539106279611588, "rewards/rejected": -0.07574189454317093, "step": 13350 }, { "epoch": 0.87, "learning_rate": 2.3751924188969876e-07, "logits/chosen": -2.2612175941467285, "logits/rejected": -2.1327712535858154, "logps/chosen": -249.13388061523438, "logps/rejected": -239.14697265625, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.026046359911561012, "rewards/margins": 0.09847725927829742, "rewards/rejected": -0.07243090122938156, "step": 13360 }, { "epoch": 0.87, "learning_rate": 2.3509598973118024e-07, "logits/chosen": -2.448444366455078, "logits/rejected": -2.249556064605713, "logps/chosen": -219.6066436767578, "logps/rejected": -162.51773071289062, "loss": 0.6918, "rewards/accuracies": 0.625, "rewards/chosen": 0.013987274840474129, "rewards/margins": 0.05698896199464798, "rewards/rejected": -0.043001689016819, "step": 13370 }, { "epoch": 0.88, "learning_rate": 2.326845524214555e-07, "logits/chosen": -2.1156511306762695, "logits/rejected": -2.193941354751587, "logps/chosen": -234.92654418945312, "logps/rejected": -209.80361938476562, "loss": 0.6924, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.001277850242331624, "rewards/margins": 0.010018276050686836, "rewards/rejected": -0.008740425109863281, "step": 13380 }, { "epoch": 0.88, "learning_rate": 2.3028494253976158e-07, "logits/chosen": -2.3883180618286133, "logits/rejected": -2.1776084899902344, "logps/chosen": -340.9706115722656, "logps/rejected": -282.01092529296875, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0029482836835086346, "rewards/margins": 0.056216467171907425, "rewards/rejected": -0.0532681830227375, "step": 13390 }, { "epoch": 0.88, "learning_rate": 2.2789717260364026e-07, "logits/chosen": -2.376600980758667, "logits/rejected": -2.19740891456604, "logps/chosen": -164.39688110351562, "logps/rejected": -148.5932159423828, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.007092096842825413, "rewards/margins": 0.059240736067295074, "rewards/rejected": -0.05214863270521164, "step": 13400 }, { "epoch": 0.88, "eval_logits/chosen": -2.3266842365264893, "eval_logits/rejected": -2.1379003524780273, "eval_logps/chosen": -230.48577880859375, "eval_logps/rejected": -218.5886993408203, "eval_loss": 0.6897343993186951, "eval_rewards/accuracies": 0.6305000185966492, "eval_rewards/chosen": 0.015191725455224514, "eval_rewards/margins": 0.08495970070362091, "eval_rewards/rejected": -0.06976797431707382, "eval_runtime": 711.3263, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 13400 }, { "epoch": 0.88, "learning_rate": 2.255212550688682e-07, "logits/chosen": -2.2746529579162598, "logits/rejected": -2.3803787231445312, "logps/chosen": -216.0994110107422, "logps/rejected": -289.5954895019531, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.018555883318185806, "rewards/margins": 0.09309352934360504, "rewards/rejected": -0.07453764975070953, "step": 13410 }, { "epoch": 0.88, "learning_rate": 2.2315720232939598e-07, "logits/chosen": -2.6417040824890137, "logits/rejected": -2.17865252494812, "logps/chosen": -258.8695373535156, "logps/rejected": -180.49835205078125, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03244171291589737, "rewards/margins": 0.1123114600777626, "rewards/rejected": -0.07986976206302643, "step": 13420 }, { "epoch": 0.88, "learning_rate": 2.2080502671727956e-07, "logits/chosen": -2.428260087966919, "logits/rejected": -2.08381724357605, "logps/chosen": -218.2057342529297, "logps/rejected": -204.05075073242188, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.018760915845632553, "rewards/margins": 0.07672649621963501, "rewards/rejected": -0.05796556919813156, "step": 13430 }, { "epoch": 0.88, "learning_rate": 2.1846474050262078e-07, "logits/chosen": -2.3987889289855957, "logits/rejected": -2.263538360595703, "logps/chosen": -243.8972930908203, "logps/rejected": -184.60328674316406, "loss": 0.6902, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.026634354144334793, "rewards/margins": 0.06503110378980637, "rewards/rejected": -0.03839675337076187, "step": 13440 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.9668877124786377, "logits/rejected": -2.058589458465576, "logps/chosen": -194.289306640625, "logps/rejected": -232.1760711669922, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.028739606961607933, "rewards/margins": 0.10366035997867584, "rewards/rejected": -0.07492075115442276, "step": 13450 }, { "epoch": 0.88, "learning_rate": 2.1381988503590578e-07, "logits/chosen": -2.0607008934020996, "logits/rejected": -2.126495599746704, "logps/chosen": -213.87832641601562, "logps/rejected": -218.08837890625, "loss": 0.6896, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.019935574382543564, "rewards/margins": 0.10359902679920197, "rewards/rejected": -0.0836634561419487, "step": 13460 }, { "epoch": 0.88, "learning_rate": 2.11515340013691e-07, "logits/chosen": -2.41133975982666, "logits/rejected": -2.3844552040100098, "logps/chosen": -228.10009765625, "logps/rejected": -219.16781616210938, "loss": 0.6883, "rewards/accuracies": 0.75, "rewards/chosen": 0.030724655836820602, "rewards/margins": 0.12647958099842072, "rewards/rejected": -0.09575492143630981, "step": 13470 }, { "epoch": 0.88, "learning_rate": 2.092227328484897e-07, "logits/chosen": -2.187948226928711, "logits/rejected": -2.1468586921691895, "logps/chosen": -202.74105834960938, "logps/rejected": -243.099609375, "loss": 0.6876, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012676420621573925, "rewards/margins": 0.09701034426689148, "rewards/rejected": -0.08433392643928528, "step": 13480 }, { "epoch": 0.88, "learning_rate": 2.0694207549966345e-07, "logits/chosen": -2.2124342918395996, "logits/rejected": -2.0843167304992676, "logps/chosen": -208.91720581054688, "logps/rejected": -197.19125366210938, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": -8.745789818931371e-05, "rewards/margins": 0.051435112953186035, "rewards/rejected": -0.05152256414294243, "step": 13490 }, { "epoch": 0.88, "learning_rate": 2.0467337986423864e-07, "logits/chosen": -2.462228298187256, "logits/rejected": -2.1654422283172607, "logps/chosen": -302.31915283203125, "logps/rejected": -275.478759765625, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0340435728430748, "rewards/margins": 0.06372375041246414, "rewards/rejected": -0.029680173844099045, "step": 13500 }, { "epoch": 0.88, "eval_logits/chosen": -2.326474189758301, "eval_logits/rejected": -2.1377792358398438, "eval_logps/chosen": -230.06185913085938, "eval_logps/rejected": -218.02516174316406, "eval_loss": 0.6897424459457397, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.019430968910455704, "eval_rewards/margins": 0.0835636630654335, "eval_rewards/rejected": -0.0641326829791069, "eval_runtime": 712.2388, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 13500 }, { "epoch": 0.88, "learning_rate": 2.0241665777684272e-07, "logits/chosen": -2.3777573108673096, "logits/rejected": -2.2866101264953613, "logps/chosen": -265.6940002441406, "logps/rejected": -234.2218017578125, "loss": 0.6878, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.025989696383476257, "rewards/margins": 0.12143020331859589, "rewards/rejected": -0.09544049948453903, "step": 13510 }, { "epoch": 0.88, "learning_rate": 2.0017192100964366e-07, "logits/chosen": -2.0364346504211426, "logits/rejected": -2.1206259727478027, "logps/chosen": -199.53173828125, "logps/rejected": -210.08749389648438, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004242539405822754, "rewards/margins": 0.08438628911972046, "rewards/rejected": -0.08014374226331711, "step": 13520 }, { "epoch": 0.89, "learning_rate": 1.9793918127228777e-07, "logits/chosen": -2.411618232727051, "logits/rejected": -2.04459547996521, "logps/chosen": -314.3971862792969, "logps/rejected": -269.76873779296875, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": 0.019022373482584953, "rewards/margins": 0.09453563392162323, "rewards/rejected": -0.07551325857639313, "step": 13530 }, { "epoch": 0.89, "learning_rate": 1.9571845021184005e-07, "logits/chosen": -2.174851894378662, "logits/rejected": -2.0823864936828613, "logps/chosen": -232.1296844482422, "logps/rejected": -243.97640991210938, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": -0.011437867768108845, "rewards/margins": 0.07728725671768188, "rewards/rejected": -0.08872512727975845, "step": 13540 }, { "epoch": 0.89, "learning_rate": 1.9350973941272027e-07, "logits/chosen": -2.308931350708008, "logits/rejected": -2.2825989723205566, "logps/chosen": -203.7218017578125, "logps/rejected": -191.3800506591797, "loss": 0.6893, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0005322685465216637, "rewards/margins": 0.08159051835536957, "rewards/rejected": -0.08105824887752533, "step": 13550 }, { "epoch": 0.89, "learning_rate": 1.9131306039664676e-07, "logits/chosen": -2.1821532249450684, "logits/rejected": -2.1290283203125, "logps/chosen": -198.3598175048828, "logps/rejected": -232.4375457763672, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.015442472882568836, "rewards/margins": 0.09721332788467407, "rewards/rejected": -0.08177085965871811, "step": 13560 }, { "epoch": 0.89, "learning_rate": 1.8912842462257358e-07, "logits/chosen": -2.2131478786468506, "logits/rejected": -2.144645929336548, "logps/chosen": -217.9169464111328, "logps/rejected": -212.69229125976562, "loss": 0.6869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.022523250430822372, "rewards/margins": 0.11762279272079468, "rewards/rejected": -0.0950995534658432, "step": 13570 }, { "epoch": 0.89, "learning_rate": 1.869558434866303e-07, "logits/chosen": -2.2858431339263916, "logits/rejected": -2.352550745010376, "logps/chosen": -180.52920532226562, "logps/rejected": -207.8330078125, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": 0.0056061758659780025, "rewards/margins": 0.0927395448088646, "rewards/rejected": -0.08713337033987045, "step": 13580 }, { "epoch": 0.89, "learning_rate": 1.847953283220652e-07, "logits/chosen": -2.4596104621887207, "logits/rejected": -2.134106397628784, "logps/chosen": -252.9336700439453, "logps/rejected": -188.153076171875, "loss": 0.6859, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03176042437553406, "rewards/margins": 0.1385561227798462, "rewards/rejected": -0.10679570585489273, "step": 13590 }, { "epoch": 0.89, "learning_rate": 1.8264689039918265e-07, "logits/chosen": -2.424584150314331, "logits/rejected": -2.084427833557129, "logps/chosen": -258.3966369628906, "logps/rejected": -238.4198760986328, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.014337467029690742, "rewards/margins": 0.07449439913034439, "rewards/rejected": -0.0601569339632988, "step": 13600 }, { "epoch": 0.89, "eval_logits/chosen": -2.3269259929656982, "eval_logits/rejected": -2.138150930404663, "eval_logps/chosen": -230.37106323242188, "eval_logps/rejected": -218.5099639892578, "eval_loss": 0.6897370219230652, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.016339082270860672, "eval_rewards/margins": 0.08531977236270905, "eval_rewards/rejected": -0.06898068636655807, "eval_runtime": 711.7818, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 13600 }, { "epoch": 0.89, "learning_rate": 1.8051054092528857e-07, "logits/chosen": -2.351792573928833, "logits/rejected": -2.163576602935791, "logps/chosen": -257.79010009765625, "logps/rejected": -259.30328369140625, "loss": 0.6886, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.03311315178871155, "rewards/margins": 0.11273415386676788, "rewards/rejected": -0.07962099462747574, "step": 13610 }, { "epoch": 0.89, "learning_rate": 1.783862910446271e-07, "logits/chosen": -1.985805869102478, "logits/rejected": -2.1247270107269287, "logps/chosen": -171.29833984375, "logps/rejected": -174.77696228027344, "loss": 0.6884, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01554956752806902, "rewards/margins": 0.12793493270874023, "rewards/rejected": -0.1123853549361229, "step": 13620 }, { "epoch": 0.89, "learning_rate": 1.762741518383271e-07, "logits/chosen": -2.367798328399658, "logits/rejected": -2.2053184509277344, "logps/chosen": -220.3152618408203, "logps/rejected": -201.3122100830078, "loss": 0.6882, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.015171055682003498, "rewards/margins": 0.08661060780286789, "rewards/rejected": -0.07143954932689667, "step": 13630 }, { "epoch": 0.89, "learning_rate": 1.7417413432434082e-07, "logits/chosen": -2.435668468475342, "logits/rejected": -2.041511058807373, "logps/chosen": -252.56173706054688, "logps/rejected": -206.9319305419922, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.0009791527409106493, "rewards/margins": 0.07137512415647507, "rewards/rejected": -0.07039596140384674, "step": 13640 }, { "epoch": 0.89, "learning_rate": 1.7208624945738855e-07, "logits/chosen": -2.435779571533203, "logits/rejected": -2.284393310546875, "logps/chosen": -216.6245574951172, "logps/rejected": -228.45632934570312, "loss": 0.6927, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.014597500674426556, "rewards/margins": 0.04394357651472092, "rewards/rejected": -0.02934606932103634, "step": 13650 }, { "epoch": 0.89, "learning_rate": 1.7001050812889995e-07, "logits/chosen": -2.412449598312378, "logits/rejected": -2.0942466259002686, "logps/chosen": -256.7345886230469, "logps/rejected": -224.16006469726562, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.01201794296503067, "rewards/margins": 0.0767635852098465, "rewards/rejected": -0.08878152817487717, "step": 13660 }, { "epoch": 0.89, "learning_rate": 1.679469211669596e-07, "logits/chosen": -2.3339667320251465, "logits/rejected": -2.177741527557373, "logps/chosen": -224.02359008789062, "logps/rejected": -183.2321319580078, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": 0.009215526282787323, "rewards/margins": 0.10812918096780777, "rewards/rejected": -0.09891365468502045, "step": 13670 }, { "epoch": 0.9, "learning_rate": 1.6589549933624715e-07, "logits/chosen": -2.3178319931030273, "logits/rejected": -2.134033679962158, "logps/chosen": -236.70999145507812, "logps/rejected": -199.88436889648438, "loss": 0.6859, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03497043251991272, "rewards/margins": 0.14659801125526428, "rewards/rejected": -0.11162757873535156, "step": 13680 }, { "epoch": 0.9, "learning_rate": 1.638562533379845e-07, "logits/chosen": -2.3221664428710938, "logits/rejected": -2.150352954864502, "logps/chosen": -253.377197265625, "logps/rejected": -197.33758544921875, "loss": 0.6902, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02325398102402687, "rewards/margins": 0.062283407896757126, "rewards/rejected": -0.039029426872730255, "step": 13690 }, { "epoch": 0.9, "learning_rate": 1.6182919380987676e-07, "logits/chosen": -2.365807056427002, "logits/rejected": -2.283456325531006, "logps/chosen": -224.37783813476562, "logps/rejected": -214.51156616210938, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013401429168879986, "rewards/margins": 0.055108923465013504, "rewards/rejected": -0.041707489639520645, "step": 13700 }, { "epoch": 0.9, "eval_logits/chosen": -2.3265719413757324, "eval_logits/rejected": -2.1378707885742188, "eval_logps/chosen": -230.2803497314453, "eval_logps/rejected": -218.3449249267578, "eval_loss": 0.6897291541099548, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.017245886847376823, "eval_rewards/margins": 0.08457593619823456, "eval_rewards/rejected": -0.06733004748821259, "eval_runtime": 711.018, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 13700 }, { "epoch": 0.9, "learning_rate": 1.598143313260603e-07, "logits/chosen": -2.2812981605529785, "logits/rejected": -2.1852867603302, "logps/chosen": -186.65716552734375, "logps/rejected": -181.11973571777344, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.013965973630547523, "rewards/margins": 0.0647711455821991, "rewards/rejected": -0.05080517381429672, "step": 13710 }, { "epoch": 0.9, "learning_rate": 1.5781167639704415e-07, "logits/chosen": -2.5151546001434326, "logits/rejected": -1.9689128398895264, "logps/chosen": -329.40081787109375, "logps/rejected": -211.47140502929688, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.027660047635436058, "rewards/margins": 0.0631406158208847, "rewards/rejected": -0.0354805663228035, "step": 13720 }, { "epoch": 0.9, "learning_rate": 1.5582123946965787e-07, "logits/chosen": -2.1882617473602295, "logits/rejected": -2.0734565258026123, "logps/chosen": -231.71762084960938, "logps/rejected": -250.3955535888672, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.026773914694786072, "rewards/margins": 0.08530018478631973, "rewards/rejected": -0.05852626636624336, "step": 13730 }, { "epoch": 0.9, "learning_rate": 1.5384303092699504e-07, "logits/chosen": -2.391803503036499, "logits/rejected": -2.1646900177001953, "logps/chosen": -284.1597900390625, "logps/rejected": -295.56170654296875, "loss": 0.6888, "rewards/accuracies": 0.75, "rewards/chosen": 0.02824859321117401, "rewards/margins": 0.13083064556121826, "rewards/rejected": -0.10258202254772186, "step": 13740 }, { "epoch": 0.9, "learning_rate": 1.518770610883613e-07, "logits/chosen": -2.2725253105163574, "logits/rejected": -2.001561403274536, "logps/chosen": -215.4679412841797, "logps/rejected": -199.0486297607422, "loss": 0.6887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.009293107315897942, "rewards/margins": 0.12994512915611267, "rewards/rejected": -0.13923820853233337, "step": 13750 }, { "epoch": 0.9, "learning_rate": 1.4992334020921735e-07, "logits/chosen": -2.275468111038208, "logits/rejected": -2.1718432903289795, "logps/chosen": -168.7460174560547, "logps/rejected": -151.58731079101562, "loss": 0.688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04226404055953026, "rewards/margins": 0.12455103546380997, "rewards/rejected": -0.08228699862957001, "step": 13760 }, { "epoch": 0.9, "learning_rate": 1.4798187848112905e-07, "logits/chosen": -2.203535556793213, "logits/rejected": -2.2031960487365723, "logps/chosen": -224.80953979492188, "logps/rejected": -193.4915313720703, "loss": 0.6885, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.018388142809271812, "rewards/margins": 0.09539445489645004, "rewards/rejected": -0.113782599568367, "step": 13770 }, { "epoch": 0.9, "learning_rate": 1.460526860317113e-07, "logits/chosen": -2.3986401557922363, "logits/rejected": -2.332296848297119, "logps/chosen": -173.04156494140625, "logps/rejected": -216.95974731445312, "loss": 0.686, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0024310871958732605, "rewards/margins": 0.12171381711959839, "rewards/rejected": -0.11928270757198334, "step": 13780 }, { "epoch": 0.9, "learning_rate": 1.441357729245771e-07, "logits/chosen": -2.5229907035827637, "logits/rejected": -1.9477760791778564, "logps/chosen": -248.5333709716797, "logps/rejected": -202.63084411621094, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.0054153092205524445, "rewards/margins": 0.10084688663482666, "rewards/rejected": -0.10626220703125, "step": 13790 }, { "epoch": 0.9, "learning_rate": 1.4223114915928482e-07, "logits/chosen": -2.1714558601379395, "logits/rejected": -1.9200232028961182, "logps/chosen": -225.0757293701172, "logps/rejected": -238.7044219970703, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.008468665182590485, "rewards/margins": 0.06861492991447449, "rewards/rejected": -0.060146261006593704, "step": 13800 }, { "epoch": 0.9, "eval_logits/chosen": -2.3265576362609863, "eval_logits/rejected": -2.137892961502075, "eval_logps/chosen": -230.2596893310547, "eval_logps/rejected": -218.37973022460938, "eval_loss": 0.6897318363189697, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": 0.017452586442232132, "eval_rewards/margins": 0.08513098210096359, "eval_rewards/rejected": -0.06767839938402176, "eval_runtime": 712.4298, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 13800 }, { "epoch": 0.9, "learning_rate": 1.403388246712842e-07, "logits/chosen": -2.2311573028564453, "logits/rejected": -1.99333918094635, "logps/chosen": -164.9827423095703, "logps/rejected": -163.3975830078125, "loss": 0.6903, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.00731696467846632, "rewards/margins": 0.055666130036115646, "rewards/rejected": -0.06298309564590454, "step": 13810 }, { "epoch": 0.9, "learning_rate": 1.3845880933186757e-07, "logits/chosen": -2.4922029972076416, "logits/rejected": -2.229177951812744, "logps/chosen": -237.0489959716797, "logps/rejected": -206.5233154296875, "loss": 0.6922, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.01689792238175869, "rewards/margins": 0.037102360278367996, "rewards/rejected": -0.020204436033964157, "step": 13820 }, { "epoch": 0.9, "learning_rate": 1.3659111294811457e-07, "logits/chosen": -2.3271474838256836, "logits/rejected": -2.1840929985046387, "logps/chosen": -193.53775024414062, "logps/rejected": -181.85487365722656, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.016945457085967064, "rewards/margins": 0.062133751809597015, "rewards/rejected": -0.07907922565937042, "step": 13830 }, { "epoch": 0.91, "learning_rate": 1.347357452628459e-07, "logits/chosen": -2.4782280921936035, "logits/rejected": -2.344364643096924, "logps/chosen": -241.06307983398438, "logps/rejected": -240.8955078125, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04234471544623375, "rewards/margins": 0.07334139943122864, "rewards/rejected": -0.030996689572930336, "step": 13840 }, { "epoch": 0.91, "learning_rate": 1.3289271595456732e-07, "logits/chosen": -2.2512059211730957, "logits/rejected": -2.064351797103882, "logps/chosen": -204.22561645507812, "logps/rejected": -189.5396270751953, "loss": 0.6878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0010356527054682374, "rewards/margins": 0.1110767275094986, "rewards/rejected": -0.1100410670042038, "step": 13850 }, { "epoch": 0.91, "learning_rate": 1.310620346374228e-07, "logits/chosen": -2.1624104976654053, "logits/rejected": -2.0331850051879883, "logps/chosen": -229.8220672607422, "logps/rejected": -205.7158660888672, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0239988062530756, "rewards/margins": 0.12696941196918488, "rewards/rejected": -0.10297061502933502, "step": 13860 }, { "epoch": 0.91, "learning_rate": 1.2924371086114274e-07, "logits/chosen": -2.213139057159424, "logits/rejected": -1.9702332019805908, "logps/chosen": -234.7813262939453, "logps/rejected": -237.0749053955078, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01280174870043993, "rewards/margins": 0.0832042545080185, "rewards/rejected": -0.07040251046419144, "step": 13870 }, { "epoch": 0.91, "learning_rate": 1.274377541109953e-07, "logits/chosen": -2.175238847732544, "logits/rejected": -2.2496845722198486, "logps/chosen": -163.90567016601562, "logps/rejected": -248.86221313476562, "loss": 0.6888, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.004448303487151861, "rewards/margins": 0.07032819092273712, "rewards/rejected": -0.06587988883256912, "step": 13880 }, { "epoch": 0.91, "learning_rate": 1.2564417380773435e-07, "logits/chosen": -2.1063437461853027, "logits/rejected": -1.9700400829315186, "logps/chosen": -177.04061889648438, "logps/rejected": -210.00146484375, "loss": 0.6885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.014564013108611107, "rewards/margins": 0.10478832572698593, "rewards/rejected": -0.09022431075572968, "step": 13890 }, { "epoch": 0.91, "learning_rate": 1.2386297930755436e-07, "logits/chosen": -2.3223581314086914, "logits/rejected": -2.3294119834899902, "logps/chosen": -249.2999267578125, "logps/rejected": -253.54208374023438, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.014141452498733997, "rewards/margins": 0.0906859040260315, "rewards/rejected": -0.10482735931873322, "step": 13900 }, { "epoch": 0.91, "eval_logits/chosen": -2.3256688117980957, "eval_logits/rejected": -2.137094497680664, "eval_logps/chosen": -230.19509887695312, "eval_logps/rejected": -218.29586791992188, "eval_loss": 0.6897297501564026, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": 0.01809842139482498, "eval_rewards/margins": 0.08493825048208237, "eval_rewards/rejected": -0.06683983653783798, "eval_runtime": 714.4928, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 13900 }, { "epoch": 0.91, "learning_rate": 1.220941799020378e-07, "logits/chosen": -2.109891176223755, "logits/rejected": -2.0245959758758545, "logps/chosen": -217.385498046875, "logps/rejected": -203.8630828857422, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.016956061124801636, "rewards/margins": 0.08830462396144867, "rewards/rejected": -0.07134857028722763, "step": 13910 }, { "epoch": 0.91, "learning_rate": 1.2033778481810975e-07, "logits/chosen": -2.38193416595459, "logits/rejected": -2.114203453063965, "logps/chosen": -217.64212036132812, "logps/rejected": -189.1055145263672, "loss": 0.6876, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.02351401373744011, "rewards/margins": 0.08809584379196167, "rewards/rejected": -0.06458182632923126, "step": 13920 }, { "epoch": 0.91, "learning_rate": 1.1859380321798591e-07, "logits/chosen": -2.3214306831359863, "logits/rejected": -2.388051748275757, "logps/chosen": -200.78286743164062, "logps/rejected": -222.736083984375, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.017784133553504944, "rewards/margins": 0.07208283245563507, "rewards/rejected": -0.05429869890213013, "step": 13930 }, { "epoch": 0.91, "learning_rate": 1.1686224419912989e-07, "logits/chosen": -2.2252655029296875, "logits/rejected": -2.0139126777648926, "logps/chosen": -251.2513885498047, "logps/rejected": -235.0546112060547, "loss": 0.6871, "rewards/accuracies": 0.75, "rewards/chosen": 0.016212433576583862, "rewards/margins": 0.13094016909599304, "rewards/rejected": -0.11472772061824799, "step": 13940 }, { "epoch": 0.91, "learning_rate": 1.1514311679420104e-07, "logits/chosen": -2.0391550064086914, "logits/rejected": -2.1126651763916016, "logps/chosen": -154.56124877929688, "logps/rejected": -220.6207733154297, "loss": 0.6874, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.005872879642993212, "rewards/margins": 0.10239820182323456, "rewards/rejected": -0.09652532637119293, "step": 13950 }, { "epoch": 0.91, "learning_rate": 1.1343642997101029e-07, "logits/chosen": -2.3179874420166016, "logits/rejected": -2.2247185707092285, "logps/chosen": -199.3680877685547, "logps/rejected": -196.05459594726562, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.029852483421564102, "rewards/margins": 0.09908358752727509, "rewards/rejected": -0.06923110783100128, "step": 13960 }, { "epoch": 0.91, "learning_rate": 1.1174219263247188e-07, "logits/chosen": -2.0668439865112305, "logits/rejected": -1.9567053318023682, "logps/chosen": -198.8998260498047, "logps/rejected": -190.6134033203125, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.000822742294985801, "rewards/margins": 0.0924471914768219, "rewards/rejected": -0.09326992928981781, "step": 13970 }, { "epoch": 0.91, "learning_rate": 1.1006041361655839e-07, "logits/chosen": -2.4929490089416504, "logits/rejected": -2.028357982635498, "logps/chosen": -208.04507446289062, "logps/rejected": -175.75112915039062, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.003921913914382458, "rewards/margins": 0.07643552869558334, "rewards/rejected": -0.07251361012458801, "step": 13980 }, { "epoch": 0.92, "learning_rate": 1.0839110169625189e-07, "logits/chosen": -2.05533504486084, "logits/rejected": -2.352057933807373, "logps/chosen": -205.25399780273438, "logps/rejected": -211.28079223632812, "loss": 0.6875, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.015882687643170357, "rewards/margins": 0.133821040391922, "rewards/rejected": -0.1179383248090744, "step": 13990 }, { "epoch": 0.92, "learning_rate": 1.06734265579502e-07, "logits/chosen": -2.337198495864868, "logits/rejected": -2.033975124359131, "logps/chosen": -262.0823974609375, "logps/rejected": -206.0341796875, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.005657001864165068, "rewards/margins": 0.10456991195678711, "rewards/rejected": -0.11022691428661346, "step": 14000 }, { "epoch": 0.92, "eval_logits/chosen": -2.326231002807617, "eval_logits/rejected": -2.137554168701172, "eval_logps/chosen": -230.58172607421875, "eval_logps/rejected": -218.70066833496094, "eval_loss": 0.689732551574707, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.01423216424882412, "eval_rewards/margins": 0.0851198136806488, "eval_rewards/rejected": -0.07088765501976013, "eval_runtime": 712.0104, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.050899139091771e-07, "logits/chosen": -2.416721820831299, "logits/rejected": -2.0669798851013184, "logps/chosen": -279.1082763671875, "logps/rejected": -241.64761352539062, "loss": 0.6898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0048080976121127605, "rewards/margins": 0.09057492017745972, "rewards/rejected": -0.09538300335407257, "step": 14010 }, { "epoch": 0.92, "learning_rate": 1.0345805526302072e-07, "logits/chosen": -2.242600917816162, "logits/rejected": -2.3467628955841064, "logps/chosen": -198.83761596679688, "logps/rejected": -201.98965454101562, "loss": 0.6904, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.023247262462973595, "rewards/margins": 0.10389737784862518, "rewards/rejected": -0.08065011352300644, "step": 14020 }, { "epoch": 0.92, "learning_rate": 1.0183869815360764e-07, "logits/chosen": -2.231644868850708, "logits/rejected": -2.350635290145874, "logps/chosen": -189.2916717529297, "logps/rejected": -228.9336700439453, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02621796727180481, "rewards/margins": 0.07001911103725433, "rewards/rejected": -0.04380114749073982, "step": 14030 }, { "epoch": 0.92, "learning_rate": 1.0023185102829763e-07, "logits/chosen": -2.0455517768859863, "logits/rejected": -2.2643027305603027, "logps/chosen": -228.81332397460938, "logps/rejected": -242.60061645507812, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.020088955760002136, "rewards/margins": 0.09602537006139755, "rewards/rejected": -0.07593640685081482, "step": 14040 }, { "epoch": 0.92, "learning_rate": 9.863752226919182e-08, "logits/chosen": -2.2488272190093994, "logits/rejected": -1.7611221075057983, "logps/chosen": -235.35617065429688, "logps/rejected": -182.02853393554688, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02376001887023449, "rewards/margins": 0.12250945717096329, "rewards/rejected": -0.09874944388866425, "step": 14050 }, { "epoch": 0.92, "learning_rate": 9.705572019309107e-08, "logits/chosen": -2.169804096221924, "logits/rejected": -2.2068305015563965, "logps/chosen": -266.7760925292969, "logps/rejected": -246.24984741210938, "loss": 0.6878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.012513126246631145, "rewards/margins": 0.10957686603069305, "rewards/rejected": -0.09706376492977142, "step": 14060 }, { "epoch": 0.92, "learning_rate": 9.548645305144849e-08, "logits/chosen": -2.3847720623016357, "logits/rejected": -2.2535669803619385, "logps/chosen": -171.47378540039062, "logps/rejected": -184.33763122558594, "loss": 0.6876, "rewards/accuracies": 0.5, "rewards/chosen": 0.020213961601257324, "rewards/margins": 0.08890150487422943, "rewards/rejected": -0.0686875432729721, "step": 14070 }, { "epoch": 0.92, "learning_rate": 9.392972903033149e-08, "logits/chosen": -2.306051254272461, "logits/rejected": -2.159519672393799, "logps/chosen": -222.64682006835938, "logps/rejected": -218.47372436523438, "loss": 0.6924, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.004844355396926403, "rewards/margins": 0.03370397537946701, "rewards/rejected": -0.028859620913863182, "step": 14080 }, { "epoch": 0.92, "learning_rate": 9.238555625037449e-08, "logits/chosen": -2.328538417816162, "logits/rejected": -2.1120870113372803, "logps/chosen": -188.9663848876953, "logps/rejected": -166.45822143554688, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017418090254068375, "rewards/margins": 0.0717364102602005, "rewards/rejected": -0.05431831628084183, "step": 14090 }, { "epoch": 0.92, "learning_rate": 9.085394276673903e-08, "logits/chosen": -2.3301329612731934, "logits/rejected": -2.0303778648376465, "logps/chosen": -267.52117919921875, "logps/rejected": -261.9759216308594, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": 0.006449407432228327, "rewards/margins": 0.0890425592660904, "rewards/rejected": -0.0825931578874588, "step": 14100 }, { "epoch": 0.92, "eval_logits/chosen": -2.325167417526245, "eval_logits/rejected": -2.1365697383880615, "eval_logps/chosen": -230.42181396484375, "eval_logps/rejected": -218.4661865234375, "eval_loss": 0.6897357106208801, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": 0.0158314798027277, "eval_rewards/margins": 0.08437444269657135, "eval_rewards/rejected": -0.0685429498553276, "eval_runtime": 710.8403, "eval_samples_per_second": 2.814, "eval_steps_per_second": 1.407, "step": 14100 }, { "epoch": 0.92, "learning_rate": 8.933489656907157e-08, "logits/chosen": -2.308310031890869, "logits/rejected": -2.2029194831848145, "logps/chosen": -217.09829711914062, "logps/rejected": -247.48666381835938, "loss": 0.6915, "rewards/accuracies": 0.5, "rewards/chosen": 0.00237136147916317, "rewards/margins": 0.05447987839579582, "rewards/rejected": -0.0521085187792778, "step": 14110 }, { "epoch": 0.92, "learning_rate": 8.782842558146127e-08, "logits/chosen": -2.3630995750427246, "logits/rejected": -2.2901546955108643, "logps/chosen": -159.1778564453125, "logps/rejected": -167.31874084472656, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02706066146492958, "rewards/margins": 0.09847380220890045, "rewards/rejected": -0.07141314446926117, "step": 14120 }, { "epoch": 0.92, "learning_rate": 8.633453766239836e-08, "logits/chosen": -2.415795087814331, "logits/rejected": -2.186148166656494, "logps/chosen": -230.60061645507812, "logps/rejected": -205.28359985351562, "loss": 0.6916, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04520539939403534, "rewards/margins": 0.06538231670856476, "rewards/rejected": -0.020176919177174568, "step": 14130 }, { "epoch": 0.93, "learning_rate": 8.485324060473448e-08, "logits/chosen": -2.249516248703003, "logits/rejected": -2.1395606994628906, "logps/chosen": -237.13107299804688, "logps/rejected": -232.41909790039062, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.031510137021541595, "rewards/margins": 0.07678806781768799, "rewards/rejected": -0.04527793079614639, "step": 14140 }, { "epoch": 0.93, "learning_rate": 8.338454213564052e-08, "logits/chosen": -2.291496753692627, "logits/rejected": -2.0189805030822754, "logps/chosen": -230.54672241210938, "logps/rejected": -223.07864379882812, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0035024010576307774, "rewards/margins": 0.09726149588823318, "rewards/rejected": -0.09375908970832825, "step": 14150 }, { "epoch": 0.93, "learning_rate": 8.192844991656679e-08, "logits/chosen": -2.2867093086242676, "logits/rejected": -2.0489730834960938, "logps/chosen": -237.1662139892578, "logps/rejected": -208.5237579345703, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.015504756942391396, "rewards/margins": 0.07765550911426544, "rewards/rejected": -0.0621507465839386, "step": 14160 }, { "epoch": 0.93, "learning_rate": 8.048497154320434e-08, "logits/chosen": -2.3233590126037598, "logits/rejected": -2.3688528537750244, "logps/chosen": -130.73397827148438, "logps/rejected": -147.06256103515625, "loss": 0.6895, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.011844434775412083, "rewards/margins": 0.07231110334396362, "rewards/rejected": -0.08415552228689194, "step": 14170 }, { "epoch": 0.93, "learning_rate": 7.905411454544265e-08, "logits/chosen": -2.324502468109131, "logits/rejected": -2.174683094024658, "logps/chosen": -236.509765625, "logps/rejected": -246.95889282226562, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -5.5506454373244196e-05, "rewards/margins": 0.057068269699811935, "rewards/rejected": -0.057123780250549316, "step": 14180 }, { "epoch": 0.93, "learning_rate": 7.763588638733332e-08, "logits/chosen": -2.3418097496032715, "logits/rejected": -2.2953555583953857, "logps/chosen": -260.10223388671875, "logps/rejected": -249.2476348876953, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.023179076611995697, "rewards/margins": 0.08969531953334808, "rewards/rejected": -0.06651624292135239, "step": 14190 }, { "epoch": 0.93, "learning_rate": 7.623029446704899e-08, "logits/chosen": -2.1991162300109863, "logits/rejected": -2.3756251335144043, "logps/chosen": -308.20355224609375, "logps/rejected": -282.3921813964844, "loss": 0.6894, "rewards/accuracies": 0.75, "rewards/chosen": 0.03237896040081978, "rewards/margins": 0.11083276569843292, "rewards/rejected": -0.07845381647348404, "step": 14200 }, { "epoch": 0.93, "eval_logits/chosen": -2.325500965118408, "eval_logits/rejected": -2.1368813514709473, "eval_logps/chosen": -230.51712036132812, "eval_logps/rejected": -218.59410095214844, "eval_loss": 0.6897341012954712, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": 0.014878012239933014, "eval_rewards/margins": 0.08469977974891663, "eval_rewards/rejected": -0.06982176750898361, "eval_runtime": 711.1188, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 14200 }, { "epoch": 0.93, "learning_rate": 7.483734611684557e-08, "logits/chosen": -2.124391555786133, "logits/rejected": -1.9591686725616455, "logps/chosen": -254.62777709960938, "logps/rejected": -209.1081085205078, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": 0.022580375894904137, "rewards/margins": 0.08641272783279419, "rewards/rejected": -0.0638323426246643, "step": 14210 }, { "epoch": 0.93, "learning_rate": 7.345704860302366e-08, "logits/chosen": -2.399385690689087, "logits/rejected": -2.3872694969177246, "logps/chosen": -246.51803588867188, "logps/rejected": -255.66213989257812, "loss": 0.6897, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.017346328124403954, "rewards/margins": 0.09822475165128708, "rewards/rejected": -0.08087843656539917, "step": 14220 }, { "epoch": 0.93, "learning_rate": 7.208940912589224e-08, "logits/chosen": -2.334130048751831, "logits/rejected": -1.9988048076629639, "logps/chosen": -210.1862335205078, "logps/rejected": -185.14744567871094, "loss": 0.6858, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0059993029572069645, "rewards/margins": 0.12339667230844498, "rewards/rejected": -0.1293959617614746, "step": 14230 }, { "epoch": 0.93, "learning_rate": 7.073443481972753e-08, "logits/chosen": -2.1471316814422607, "logits/rejected": -2.125434637069702, "logps/chosen": -184.0389404296875, "logps/rejected": -211.4552459716797, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": 0.0011679657036438584, "rewards/margins": 0.07914597541093826, "rewards/rejected": -0.07797800749540329, "step": 14240 }, { "epoch": 0.93, "learning_rate": 6.939213275274027e-08, "logits/chosen": -2.2806684970855713, "logits/rejected": -2.240402936935425, "logps/chosen": -238.58798217773438, "logps/rejected": -225.0943603515625, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": 0.006657888181507587, "rewards/margins": 0.06109069660305977, "rewards/rejected": -0.054432809352874756, "step": 14250 }, { "epoch": 0.93, "learning_rate": 6.806250992703461e-08, "logits/chosen": -2.3053228855133057, "logits/rejected": -2.15181303024292, "logps/chosen": -214.410400390625, "logps/rejected": -195.6936798095703, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.022841984406113625, "rewards/margins": 0.07493428885936737, "rewards/rejected": -0.0520923025906086, "step": 14260 }, { "epoch": 0.93, "learning_rate": 6.674557327857572e-08, "logits/chosen": -2.2985949516296387, "logits/rejected": -2.311959743499756, "logps/chosen": -251.68814086914062, "logps/rejected": -254.1820831298828, "loss": 0.6867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.025825273245573044, "rewards/margins": 0.11581947654485703, "rewards/rejected": -0.08999422192573547, "step": 14270 }, { "epoch": 0.93, "learning_rate": 6.544132967714917e-08, "logits/chosen": -2.0479283332824707, "logits/rejected": -2.0568835735321045, "logps/chosen": -246.87765502929688, "logps/rejected": -244.86984252929688, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.00017667413339950144, "rewards/margins": 0.11704652011394501, "rewards/rejected": -0.11722320318222046, "step": 14280 }, { "epoch": 0.93, "learning_rate": 6.414978592632932e-08, "logits/chosen": -2.3786206245422363, "logits/rejected": -1.9369407892227173, "logps/chosen": -260.21160888671875, "logps/rejected": -222.98580932617188, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": 0.008877063170075417, "rewards/margins": 0.09229589253664017, "rewards/rejected": -0.0834188237786293, "step": 14290 }, { "epoch": 0.94, "learning_rate": 6.287094876344046e-08, "logits/chosen": -2.3294384479522705, "logits/rejected": -2.362797260284424, "logps/chosen": -168.59146118164062, "logps/rejected": -183.0385284423828, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03828797861933708, "rewards/margins": 0.07808025181293488, "rewards/rejected": -0.0397922620177269, "step": 14300 }, { "epoch": 0.94, "eval_logits/chosen": -2.325085163116455, "eval_logits/rejected": -2.1365013122558594, "eval_logps/chosen": -230.55078125, "eval_logps/rejected": -218.63143920898438, "eval_loss": 0.6897284388542175, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": 0.014541618525981903, "eval_rewards/margins": 0.08473705500364304, "eval_rewards/rejected": -0.07019543647766113, "eval_runtime": 710.3597, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.408, "step": 14300 }, { "epoch": 0.94, "learning_rate": 6.160482485952413e-08, "logits/chosen": -2.46873140335083, "logits/rejected": -2.203112840652466, "logps/chosen": -236.5559539794922, "logps/rejected": -211.58816528320312, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.001061995280906558, "rewards/margins": 0.0682806596159935, "rewards/rejected": -0.06934265792369843, "step": 14310 }, { "epoch": 0.94, "learning_rate": 6.035142081930234e-08, "logits/chosen": -2.333582639694214, "logits/rejected": -1.9607412815093994, "logps/chosen": -263.0543518066406, "logps/rejected": -192.89089965820312, "loss": 0.6906, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.002580016851425171, "rewards/margins": 0.0685344785451889, "rewards/rejected": -0.07111448794603348, "step": 14320 }, { "epoch": 0.94, "learning_rate": 5.911074318114496e-08, "logits/chosen": -2.16135311126709, "logits/rejected": -2.2769253253936768, "logps/chosen": -202.2639617919922, "logps/rejected": -251.79052734375, "loss": 0.6907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01324182003736496, "rewards/margins": 0.07401735335588455, "rewards/rejected": -0.060775529593229294, "step": 14330 }, { "epoch": 0.94, "learning_rate": 5.788279841703381e-08, "logits/chosen": -2.3670153617858887, "logits/rejected": -2.1148641109466553, "logps/chosen": -182.29180908203125, "logps/rejected": -184.1455535888672, "loss": 0.6891, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01617772877216339, "rewards/margins": 0.09520837664604187, "rewards/rejected": -0.07903064042329788, "step": 14340 }, { "epoch": 0.94, "learning_rate": 5.66675929325311e-08, "logits/chosen": -2.369096279144287, "logits/rejected": -2.1264519691467285, "logps/chosen": -218.7493438720703, "logps/rejected": -212.5388641357422, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.019151246175169945, "rewards/margins": 0.05138329789042473, "rewards/rejected": -0.032232046127319336, "step": 14350 }, { "epoch": 0.94, "learning_rate": 5.546513306674301e-08, "logits/chosen": -2.2958080768585205, "logits/rejected": -1.9007478952407837, "logps/chosen": -276.90521240234375, "logps/rejected": -211.9548797607422, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.016114667057991028, "rewards/margins": 0.10132592916488647, "rewards/rejected": -0.08521126955747604, "step": 14360 }, { "epoch": 0.94, "learning_rate": 5.4275425092290004e-08, "logits/chosen": -2.4178357124328613, "logits/rejected": -2.370293140411377, "logps/chosen": -252.72412109375, "logps/rejected": -242.9341278076172, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.020720353350043297, "rewards/margins": 0.08652675151824951, "rewards/rejected": -0.06580640375614166, "step": 14370 }, { "epoch": 0.94, "learning_rate": 5.309847521527078e-08, "logits/chosen": -2.2703206539154053, "logits/rejected": -1.9016317129135132, "logps/chosen": -283.227294921875, "logps/rejected": -247.84408569335938, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.013460688292980194, "rewards/margins": 0.07545115798711777, "rewards/rejected": -0.06199047714471817, "step": 14380 }, { "epoch": 0.94, "learning_rate": 5.1934289575233385e-08, "logits/chosen": -2.147324323654175, "logits/rejected": -1.8097765445709229, "logps/chosen": -240.4329071044922, "logps/rejected": -215.96237182617188, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.009034966118633747, "rewards/margins": 0.09896841645240784, "rewards/rejected": -0.10800337791442871, "step": 14390 }, { "epoch": 0.94, "learning_rate": 5.078287424513994e-08, "logits/chosen": -2.390627384185791, "logits/rejected": -2.2773404121398926, "logps/chosen": -270.3782653808594, "logps/rejected": -206.30615234375, "loss": 0.6893, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.003305424703285098, "rewards/margins": 0.10758145898580551, "rewards/rejected": -0.11088689416646957, "step": 14400 }, { "epoch": 0.94, "eval_logits/chosen": -2.3247244358062744, "eval_logits/rejected": -2.1361277103424072, "eval_logps/chosen": -230.61825561523438, "eval_logps/rejected": -218.7084503173828, "eval_loss": 0.6897271871566772, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": 0.013867066241800785, "eval_rewards/margins": 0.08483249694108963, "eval_rewards/rejected": -0.07096543163061142, "eval_runtime": 710.9987, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 14400 }, { "epoch": 0.94, "learning_rate": 4.964423523133671e-08, "logits/chosen": -2.3970162868499756, "logits/rejected": -2.158749580383301, "logps/chosen": -215.6207275390625, "logps/rejected": -187.32325744628906, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": 0.021107520908117294, "rewards/margins": 0.060516953468322754, "rewards/rejected": -0.03940943256020546, "step": 14410 }, { "epoch": 0.94, "learning_rate": 4.8518378473522976e-08, "logits/chosen": -2.262331008911133, "logits/rejected": -2.106858253479004, "logps/chosen": -250.4281768798828, "logps/rejected": -251.893310546875, "loss": 0.6871, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.000816689629573375, "rewards/margins": 0.08192334324121475, "rewards/rejected": -0.08110664784908295, "step": 14420 }, { "epoch": 0.94, "learning_rate": 4.7405309844718584e-08, "logits/chosen": -2.184021472930908, "logits/rejected": -2.0803000926971436, "logps/chosen": -192.67074584960938, "logps/rejected": -213.4363250732422, "loss": 0.6872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0017172780353575945, "rewards/margins": 0.11569315195083618, "rewards/rejected": -0.11741043627262115, "step": 14430 }, { "epoch": 0.94, "learning_rate": 4.630503515123508e-08, "logits/chosen": -2.426945209503174, "logits/rejected": -2.128527879714966, "logps/chosen": -200.57327270507812, "logps/rejected": -160.0536651611328, "loss": 0.6882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0029270625673234463, "rewards/margins": 0.09346749633550644, "rewards/rejected": -0.09054042398929596, "step": 14440 }, { "epoch": 0.95, "learning_rate": 4.5217560132644056e-08, "logits/chosen": -2.2494208812713623, "logits/rejected": -2.1792380809783936, "logps/chosen": -148.93338012695312, "logps/rejected": -174.1099395751953, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.016862403601408005, "rewards/margins": 0.06775657087564468, "rewards/rejected": -0.05089417099952698, "step": 14450 }, { "epoch": 0.95, "learning_rate": 4.41428904617483e-08, "logits/chosen": -2.2892022132873535, "logits/rejected": -2.2819294929504395, "logps/chosen": -177.44277954101562, "logps/rejected": -189.7292938232422, "loss": 0.6925, "rewards/accuracies": 0.5, "rewards/chosen": -0.0033408640883862972, "rewards/margins": 0.07625994831323624, "rewards/rejected": -0.07960081100463867, "step": 14460 }, { "epoch": 0.95, "learning_rate": 4.3081031744550696e-08, "logits/chosen": -2.3867905139923096, "logits/rejected": -2.3108649253845215, "logps/chosen": -250.4009552001953, "logps/rejected": -236.97607421875, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.027443695813417435, "rewards/margins": 0.08902832120656967, "rewards/rejected": -0.061584629118442535, "step": 14470 }, { "epoch": 0.95, "learning_rate": 4.2031989520227025e-08, "logits/chosen": -2.3677258491516113, "logits/rejected": -2.1632471084594727, "logps/chosen": -225.92098999023438, "logps/rejected": -210.75198364257812, "loss": 0.6907, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.023934394121170044, "rewards/margins": 0.06718473136425018, "rewards/rejected": -0.04325033351778984, "step": 14480 }, { "epoch": 0.95, "learning_rate": 4.099576926109461e-08, "logits/chosen": -2.445075273513794, "logits/rejected": -1.9394609928131104, "logps/chosen": -237.6958465576172, "logps/rejected": -165.7293243408203, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": 0.0208599753677845, "rewards/margins": 0.07187938690185547, "rewards/rejected": -0.05101940780878067, "step": 14490 }, { "epoch": 0.95, "learning_rate": 3.997237637258705e-08, "logits/chosen": -2.277336835861206, "logits/rejected": -2.330341339111328, "logps/chosen": -313.8475341796875, "logps/rejected": -279.31109619140625, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03405776619911194, "rewards/margins": 0.08806699514389038, "rewards/rejected": -0.05400923639535904, "step": 14500 }, { "epoch": 0.95, "eval_logits/chosen": -2.3249900341033936, "eval_logits/rejected": -2.136406660079956, "eval_logps/chosen": -230.61793518066406, "eval_logps/rejected": -218.70700073242188, "eval_loss": 0.6897269487380981, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.013869978487491608, "eval_rewards/margins": 0.08482073247432709, "eval_rewards/rejected": -0.07095075398683548, "eval_runtime": 710.297, "eval_samples_per_second": 2.816, "eval_steps_per_second": 1.408, "step": 14500 }, { "epoch": 0.95, "learning_rate": 3.8961816193222035e-08, "logits/chosen": -2.4175612926483154, "logits/rejected": -2.200261354446411, "logps/chosen": -237.2295379638672, "logps/rejected": -184.6630096435547, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.013792415149509907, "rewards/margins": 0.04766743257641792, "rewards/rejected": -0.061459846794605255, "step": 14510 }, { "epoch": 0.95, "learning_rate": 3.79640939945769e-08, "logits/chosen": -2.3744473457336426, "logits/rejected": -2.240773916244507, "logps/chosen": -283.0396728515625, "logps/rejected": -200.9557647705078, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03181237354874611, "rewards/margins": 0.04180184006690979, "rewards/rejected": -0.009989465586841106, "step": 14520 }, { "epoch": 0.95, "learning_rate": 3.697921498125895e-08, "logits/chosen": -2.109978199005127, "logits/rejected": -2.2066032886505127, "logps/chosen": -211.96316528320312, "logps/rejected": -225.83584594726562, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.0201013945043087, "rewards/margins": 0.07992889732122421, "rewards/rejected": -0.10003030300140381, "step": 14530 }, { "epoch": 0.95, "learning_rate": 3.6007184290880456e-08, "logits/chosen": -2.3271656036376953, "logits/rejected": -2.227792739868164, "logps/chosen": -211.0209197998047, "logps/rejected": -203.0982666015625, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.009520738385617733, "rewards/margins": 0.08740987628698349, "rewards/rejected": -0.07788912951946259, "step": 14540 }, { "epoch": 0.95, "learning_rate": 3.504800699402872e-08, "logits/chosen": -2.557425022125244, "logits/rejected": -2.2593464851379395, "logps/chosen": -342.3351135253906, "logps/rejected": -273.93585205078125, "loss": 0.6919, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.03319120034575462, "rewards/margins": 0.04967629909515381, "rewards/rejected": -0.016485098749399185, "step": 14550 }, { "epoch": 0.95, "learning_rate": 3.4101688094242967e-08, "logits/chosen": -2.264317035675049, "logits/rejected": -2.1588878631591797, "logps/chosen": -270.6173095703125, "logps/rejected": -257.766357421875, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.010563389398157597, "rewards/margins": 0.12325240671634674, "rewards/rejected": -0.13381578028202057, "step": 14560 }, { "epoch": 0.95, "learning_rate": 3.3168232527985564e-08, "logits/chosen": -2.2050108909606934, "logits/rejected": -1.9072158336639404, "logps/chosen": -241.6295166015625, "logps/rejected": -194.75392150878906, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.00010975040640914813, "rewards/margins": 0.07874272018671036, "rewards/rejected": -0.07885247468948364, "step": 14570 }, { "epoch": 0.95, "learning_rate": 3.224764516461892e-08, "logits/chosen": -2.3158745765686035, "logits/rejected": -2.1368792057037354, "logps/chosen": -252.09890747070312, "logps/rejected": -232.61062622070312, "loss": 0.6878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.028999557718634605, "rewards/margins": 0.1126040369272232, "rewards/rejected": -0.08360447734594345, "step": 14580 }, { "epoch": 0.95, "learning_rate": 3.133993080637665e-08, "logits/chosen": -2.2999939918518066, "logits/rejected": -2.108703374862671, "logps/chosen": -201.35792541503906, "logps/rejected": -199.75430297851562, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.015230001881718636, "rewards/margins": 0.10807321965694427, "rewards/rejected": -0.09284321218729019, "step": 14590 }, { "epoch": 0.96, "learning_rate": 3.0445094188342186e-08, "logits/chosen": -2.112121343612671, "logits/rejected": -1.8166097402572632, "logps/chosen": -253.4682159423828, "logps/rejected": -182.1717987060547, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 8.347779657924548e-05, "rewards/margins": 0.08967778086662292, "rewards/rejected": -0.0895942971110344, "step": 14600 }, { "epoch": 0.96, "eval_logits/chosen": -2.3248980045318604, "eval_logits/rejected": -2.1363399028778076, "eval_logps/chosen": -230.62680053710938, "eval_logps/rejected": -218.67767333984375, "eval_loss": 0.6897242665290833, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": 0.013781617395579815, "eval_rewards/margins": 0.08443937450647354, "eval_rewards/rejected": -0.07065775245428085, "eval_runtime": 710.5131, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 14600 }, { "epoch": 0.96, "learning_rate": 2.9563139978421028e-08, "logits/chosen": -2.2371926307678223, "logits/rejected": -2.259657382965088, "logps/chosen": -223.68856811523438, "logps/rejected": -221.4380340576172, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.021833080798387527, "rewards/margins": 0.05533973500132561, "rewards/rejected": -0.03350665792822838, "step": 14610 }, { "epoch": 0.96, "learning_rate": 2.869407277731939e-08, "logits/chosen": -2.2002997398376465, "logits/rejected": -2.1275038719177246, "logps/chosen": -183.22903442382812, "logps/rejected": -168.91224670410156, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02028103545308113, "rewards/margins": 0.08852804452180862, "rewards/rejected": -0.06824701279401779, "step": 14620 }, { "epoch": 0.96, "learning_rate": 2.783789711851642e-08, "logits/chosen": -2.3309357166290283, "logits/rejected": -2.1030194759368896, "logps/chosen": -157.8726043701172, "logps/rejected": -147.86341857910156, "loss": 0.6878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.03978399187326431, "rewards/margins": 0.12423492968082428, "rewards/rejected": -0.08445094525814056, "step": 14630 }, { "epoch": 0.96, "learning_rate": 2.6994617468244778e-08, "logits/chosen": -2.4272868633270264, "logits/rejected": -1.9481617212295532, "logps/chosen": -210.47348022460938, "logps/rejected": -158.01644897460938, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": 0.02547174133360386, "rewards/margins": 0.11461669206619263, "rewards/rejected": -0.08914494514465332, "step": 14640 }, { "epoch": 0.96, "learning_rate": 2.6164238225463155e-08, "logits/chosen": -2.264310598373413, "logits/rejected": -1.9083404541015625, "logps/chosen": -281.5527648925781, "logps/rejected": -212.22207641601562, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0087438328191638, "rewards/margins": 0.09177269041538239, "rewards/rejected": -0.08302884548902512, "step": 14650 }, { "epoch": 0.96, "learning_rate": 2.534676372183742e-08, "logits/chosen": -2.288485527038574, "logits/rejected": -2.1355769634246826, "logps/chosen": -282.95892333984375, "logps/rejected": -237.60372924804688, "loss": 0.6901, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009953884407877922, "rewards/margins": 0.06971423327922821, "rewards/rejected": -0.05976034328341484, "step": 14660 }, { "epoch": 0.96, "learning_rate": 2.4542198221714218e-08, "logits/chosen": -2.1728930473327637, "logits/rejected": -1.9003547430038452, "logps/chosen": -137.03646850585938, "logps/rejected": -146.15728759765625, "loss": 0.6883, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.010704811662435532, "rewards/margins": 0.08893907815217972, "rewards/rejected": -0.07823427021503448, "step": 14670 }, { "epoch": 0.96, "learning_rate": 2.3750545922101854e-08, "logits/chosen": -2.5925240516662598, "logits/rejected": -2.207447052001953, "logps/chosen": -308.4715270996094, "logps/rejected": -248.4219207763672, "loss": 0.6903, "rewards/accuracies": 0.625, "rewards/chosen": 0.009196789935231209, "rewards/margins": 0.08344466239213943, "rewards/rejected": -0.07424787431955338, "step": 14680 }, { "epoch": 0.96, "learning_rate": 2.2971810952646112e-08, "logits/chosen": -2.3056700229644775, "logits/rejected": -2.2151083946228027, "logps/chosen": -256.753662109375, "logps/rejected": -214.6802215576172, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01609097793698311, "rewards/margins": 0.056323904544115067, "rewards/rejected": -0.04023292288184166, "step": 14690 }, { "epoch": 0.96, "learning_rate": 2.2205997375610576e-08, "logits/chosen": -2.1445729732513428, "logits/rejected": -2.0863611698150635, "logps/chosen": -181.83251953125, "logps/rejected": -196.3846435546875, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": 0.03673393279314041, "rewards/margins": 0.09392055124044418, "rewards/rejected": -0.057186610996723175, "step": 14700 }, { "epoch": 0.96, "eval_logits/chosen": -2.324772834777832, "eval_logits/rejected": -2.1362075805664062, "eval_logps/chosen": -230.6251678466797, "eval_logps/rejected": -218.66000366210938, "eval_loss": 0.6897311806678772, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": 0.013797725550830364, "eval_rewards/margins": 0.08427882194519043, "eval_rewards/rejected": -0.07048109173774719, "eval_runtime": 709.9702, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.409, "step": 14700 }, { "epoch": 0.96, "learning_rate": 2.1453109185853304e-08, "logits/chosen": -2.3570213317871094, "logits/rejected": -2.299567461013794, "logps/chosen": -199.55894470214844, "logps/rejected": -209.33212280273438, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02519531175494194, "rewards/margins": 0.08025936782360077, "rewards/rejected": -0.05506405234336853, "step": 14710 }, { "epoch": 0.96, "learning_rate": 2.0713150310808784e-08, "logits/chosen": -2.121694564819336, "logits/rejected": -2.446938991546631, "logps/chosen": -226.5384979248047, "logps/rejected": -235.46133422851562, "loss": 0.6924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.010506866499781609, "rewards/margins": 0.031594760715961456, "rewards/rejected": -0.042101629078388214, "step": 14720 }, { "epoch": 0.96, "learning_rate": 1.9986124610464064e-08, "logits/chosen": -2.2158148288726807, "logits/rejected": -1.9099407196044922, "logps/chosen": -285.78350830078125, "logps/rejected": -228.42324829101562, "loss": 0.6882, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.014465957880020142, "rewards/margins": 0.12452026456594467, "rewards/rejected": -0.11005431413650513, "step": 14730 }, { "epoch": 0.96, "learning_rate": 1.927203587734211e-08, "logits/chosen": -2.19920015335083, "logits/rejected": -1.8051159381866455, "logps/chosen": -250.2556915283203, "logps/rejected": -210.61441040039062, "loss": 0.6888, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.027360107749700546, "rewards/margins": 0.08885184675455093, "rewards/rejected": -0.06149173900485039, "step": 14740 }, { "epoch": 0.97, "learning_rate": 1.8570887836479034e-08, "logits/chosen": -2.3139779567718506, "logits/rejected": -2.1160130500793457, "logps/chosen": -195.26492309570312, "logps/rejected": -255.01797485351562, "loss": 0.6899, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.016417790204286575, "rewards/margins": 0.06937674432992935, "rewards/rejected": -0.08579452335834503, "step": 14750 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -2.3864521980285645, "logits/rejected": -2.3232614994049072, "logps/chosen": -292.4091796875, "logps/rejected": -298.75286865234375, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": 0.04011436551809311, "rewards/margins": 0.0823252946138382, "rewards/rejected": -0.04221092164516449, "step": 14760 }, { "epoch": 0.97, "learning_rate": 1.7207428394132865e-08, "logits/chosen": -2.5386404991149902, "logits/rejected": -2.052577018737793, "logps/chosen": -268.1858825683594, "logps/rejected": -225.7917938232422, "loss": 0.6873, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.01594383455812931, "rewards/margins": 0.12293130159378052, "rewards/rejected": -0.10698747634887695, "step": 14770 }, { "epoch": 0.97, "learning_rate": 1.654512410512177e-08, "logits/chosen": -2.2604587078094482, "logits/rejected": -2.0169408321380615, "logps/chosen": -253.12109375, "logps/rejected": -196.11129760742188, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004558461718261242, "rewards/margins": 0.0537867471575737, "rewards/rejected": -0.04922827333211899, "step": 14780 }, { "epoch": 0.97, "learning_rate": 1.5895774733277468e-08, "logits/chosen": -2.3233249187469482, "logits/rejected": -1.9812166690826416, "logps/chosen": -277.13775634765625, "logps/rejected": -236.5921630859375, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.026791805401444435, "rewards/margins": 0.09690927714109421, "rewards/rejected": -0.07011748105287552, "step": 14790 }, { "epoch": 0.97, "learning_rate": 1.5259383665924e-08, "logits/chosen": -2.584911823272705, "logits/rejected": -2.1934475898742676, "logps/chosen": -337.8364562988281, "logps/rejected": -252.3568572998047, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.042554210871458054, "rewards/margins": 0.08529296517372131, "rewards/rejected": -0.04273875802755356, "step": 14800 }, { "epoch": 0.97, "eval_logits/chosen": -2.324951171875, "eval_logits/rejected": -2.136404037475586, "eval_logps/chosen": -230.61355590820312, "eval_logps/rejected": -218.665283203125, "eval_loss": 0.6897242665290833, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": 0.013913972303271294, "eval_rewards/margins": 0.08444766700267792, "eval_rewards/rejected": -0.07053370773792267, "eval_runtime": 710.025, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.408, "step": 14800 }, { "epoch": 0.97, "learning_rate": 1.4635954222789461e-08, "logits/chosen": -2.2716784477233887, "logits/rejected": -2.1852469444274902, "logps/chosen": -214.40774536132812, "logps/rejected": -227.9254150390625, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.026998598128557205, "rewards/margins": 0.08409784734249115, "rewards/rejected": -0.05709924176335335, "step": 14810 }, { "epoch": 0.97, "learning_rate": 1.402548965598688e-08, "logits/chosen": -2.210674285888672, "logits/rejected": -2.305459499359131, "logps/chosen": -202.4854736328125, "logps/rejected": -205.67385864257812, "loss": 0.6903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0214177705347538, "rewards/margins": 0.06404820829629898, "rewards/rejected": -0.04263044521212578, "step": 14820 }, { "epoch": 0.97, "learning_rate": 1.3427993149998375e-08, "logits/chosen": -2.4624884128570557, "logits/rejected": -2.211487293243408, "logps/chosen": -238.8626251220703, "logps/rejected": -194.0426483154297, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": 0.03174557164311409, "rewards/margins": 0.10550177097320557, "rewards/rejected": -0.07375619560480118, "step": 14830 }, { "epoch": 0.97, "learning_rate": 1.2843467821658518e-08, "logits/chosen": -2.4166269302368164, "logits/rejected": -2.350491523742676, "logps/chosen": -222.0076141357422, "logps/rejected": -231.8187713623047, "loss": 0.6888, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.04215265437960625, "rewards/margins": 0.1017962247133255, "rewards/rejected": -0.05964357778429985, "step": 14840 }, { "epoch": 0.97, "learning_rate": 1.2271916720137666e-08, "logits/chosen": -2.52500581741333, "logits/rejected": -2.208137035369873, "logps/chosen": -285.6896057128906, "logps/rejected": -245.7957000732422, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": 0.0007992383325472474, "rewards/margins": 0.06241076439619064, "rewards/rejected": -0.06161152571439743, "step": 14850 }, { "epoch": 0.97, "learning_rate": 1.171334282692671e-08, "logits/chosen": -2.3669638633728027, "logits/rejected": -2.2914958000183105, "logps/chosen": -280.2817077636719, "logps/rejected": -260.38970947265625, "loss": 0.6904, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.023167315870523453, "rewards/margins": 0.10739920288324356, "rewards/rejected": -0.084231898188591, "step": 14860 }, { "epoch": 0.97, "learning_rate": 1.116774905582041e-08, "logits/chosen": -2.4192698001861572, "logits/rejected": -2.087759017944336, "logps/chosen": -183.83871459960938, "logps/rejected": -177.45944213867188, "loss": 0.6911, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00979800708591938, "rewards/margins": 0.06323973834514618, "rewards/rejected": -0.05344173312187195, "step": 14870 }, { "epoch": 0.97, "learning_rate": 1.0635138252902966e-08, "logits/chosen": -2.504063129425049, "logits/rejected": -2.2471530437469482, "logps/chosen": -229.2535400390625, "logps/rejected": -215.491455078125, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011604288592934608, "rewards/margins": 0.08366361260414124, "rewards/rejected": -0.07205932587385178, "step": 14880 }, { "epoch": 0.97, "learning_rate": 1.0115513196533589e-08, "logits/chosen": -2.338160991668701, "logits/rejected": -2.171861410140991, "logps/chosen": -261.0177001953125, "logps/rejected": -249.5078125, "loss": 0.6914, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.015924451872706413, "rewards/margins": 0.05798298865556717, "rewards/rejected": -0.04205853492021561, "step": 14890 }, { "epoch": 0.97, "learning_rate": 9.608876597330952e-09, "logits/chosen": -2.3476691246032715, "logits/rejected": -2.022096633911133, "logps/chosen": -288.65777587890625, "logps/rejected": -285.6450500488281, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.020350560545921326, "rewards/margins": 0.08672511577606201, "rewards/rejected": -0.10707566887140274, "step": 14900 }, { "epoch": 0.97, "eval_logits/chosen": -2.3250153064727783, "eval_logits/rejected": -2.1365039348602295, "eval_logps/chosen": -230.6240997314453, "eval_logps/rejected": -218.6448974609375, "eval_loss": 0.6897345185279846, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.013808542862534523, "eval_rewards/margins": 0.08413854986429214, "eval_rewards/rejected": -0.07033000141382217, "eval_runtime": 709.7106, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 14900 }, { "epoch": 0.98, "learning_rate": 9.115231098159594e-09, "logits/chosen": -2.4037442207336426, "logits/rejected": -2.292450189590454, "logps/chosen": -251.7987518310547, "logps/rejected": -238.9403076171875, "loss": 0.691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.015149926766753197, "rewards/margins": 0.074435293674469, "rewards/rejected": -0.05928536504507065, "step": 14910 }, { "epoch": 0.98, "learning_rate": 8.634579274116317e-09, "logits/chosen": -2.260741710662842, "logits/rejected": -2.1925673484802246, "logps/chosen": -188.34481811523438, "logps/rejected": -219.61099243164062, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01714429259300232, "rewards/margins": 0.10656633228063583, "rewards/rejected": -0.08942203223705292, "step": 14920 }, { "epoch": 0.98, "learning_rate": 8.166923632516865e-09, "logits/chosen": -2.439234733581543, "logits/rejected": -2.150946617126465, "logps/chosen": -225.3989715576172, "logps/rejected": -278.0585021972656, "loss": 0.6849, "rewards/accuracies": 0.75, "rewards/chosen": 0.02332211658358574, "rewards/margins": 0.1503240466117859, "rewards/rejected": -0.12700191140174866, "step": 14930 }, { "epoch": 0.98, "learning_rate": 7.712266612881492e-09, "logits/chosen": -2.1845946311950684, "logits/rejected": -2.0539097785949707, "logps/chosen": -181.65396118164062, "logps/rejected": -185.66207885742188, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0358271598815918, "rewards/margins": 0.088839091360569, "rewards/rejected": -0.0530119314789772, "step": 14940 }, { "epoch": 0.98, "learning_rate": 7.270610586924687e-09, "logits/chosen": -2.450291872024536, "logits/rejected": -2.2107715606689453, "logps/chosen": -260.78521728515625, "logps/rejected": -224.33633422851562, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.041800957173109055, "rewards/margins": 0.07631386816501617, "rewards/rejected": -0.03451291099190712, "step": 14950 }, { "epoch": 0.98, "learning_rate": 6.841957858539916e-09, "logits/chosen": -2.2664036750793457, "logits/rejected": -2.1220388412475586, "logps/chosen": -168.59519958496094, "logps/rejected": -185.31361389160156, "loss": 0.6911, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.019396457821130753, "rewards/margins": 0.05867626518011093, "rewards/rejected": -0.07807272672653198, "step": 14960 }, { "epoch": 0.98, "learning_rate": 6.426310663790181e-09, "logits/chosen": -2.1963348388671875, "logits/rejected": -2.1024935245513916, "logps/chosen": -236.64208984375, "logps/rejected": -211.93154907226562, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0033364570699632168, "rewards/margins": 0.06757830083370209, "rewards/rejected": -0.06424184143543243, "step": 14970 }, { "epoch": 0.98, "learning_rate": 6.023671170894696e-09, "logits/chosen": -2.5223042964935303, "logits/rejected": -1.931014060974121, "logps/chosen": -288.4738464355469, "logps/rejected": -213.46670532226562, "loss": 0.6883, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.03854692354798317, "rewards/margins": 0.11772453784942627, "rewards/rejected": -0.0791776180267334, "step": 14980 }, { "epoch": 0.98, "learning_rate": 5.634041480218344e-09, "logits/chosen": -2.4757115840911865, "logits/rejected": -2.137505054473877, "logps/chosen": -250.7669677734375, "logps/rejected": -253.52487182617188, "loss": 0.6897, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.023066259920597076, "rewards/margins": 0.07198301702737808, "rewards/rejected": -0.0489167645573616, "step": 14990 }, { "epoch": 0.98, "learning_rate": 5.257423624260849e-09, "logits/chosen": -2.520918369293213, "logits/rejected": -2.0873606204986572, "logps/chosen": -260.77056884765625, "logps/rejected": -224.59854125976562, "loss": 0.6925, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.025528425350785255, "rewards/margins": 0.07071218639612198, "rewards/rejected": -0.045183759182691574, "step": 15000 }, { "epoch": 0.98, "eval_logits/chosen": -2.325486660003662, "eval_logits/rejected": -2.136941432952881, "eval_logps/chosen": -230.58958435058594, "eval_logps/rejected": -218.62281799316406, "eval_loss": 0.6897296905517578, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": 0.014153635129332542, "eval_rewards/margins": 0.08426273614168167, "eval_rewards/rejected": -0.07010909914970398, "eval_runtime": 709.7214, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 15000 }, { "epoch": 0.98, "learning_rate": 4.893819567644564e-09, "logits/chosen": -2.2012670040130615, "logits/rejected": -2.1785387992858887, "logps/chosen": -188.42910766601562, "logps/rejected": -204.0313720703125, "loss": 0.6902, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.004323553293943405, "rewards/margins": 0.054262660443782806, "rewards/rejected": -0.05858622118830681, "step": 15010 }, { "epoch": 0.98, "learning_rate": 4.543231207107257e-09, "logits/chosen": -2.2762811183929443, "logits/rejected": -2.059688091278076, "logps/chosen": -257.8048400878906, "logps/rejected": -240.6076202392578, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": 0.00016754865646362305, "rewards/margins": 0.0641152560710907, "rewards/rejected": -0.06394769996404648, "step": 15020 }, { "epoch": 0.98, "learning_rate": 4.205660371488785e-09, "logits/chosen": -2.5772321224212646, "logits/rejected": -2.2152597904205322, "logps/chosen": -282.9827575683594, "logps/rejected": -245.66513061523438, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.011705792509019375, "rewards/margins": 0.04472345858812332, "rewards/rejected": -0.03301766887307167, "step": 15030 }, { "epoch": 0.98, "learning_rate": 3.88110882172471e-09, "logits/chosen": -2.2456133365631104, "logits/rejected": -2.1853480339050293, "logps/chosen": -220.9130859375, "logps/rejected": -221.7021026611328, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0021468736231327057, "rewards/margins": 0.05425567179918289, "rewards/rejected": -0.0564025342464447, "step": 15040 }, { "epoch": 0.98, "learning_rate": 3.569578250834371e-09, "logits/chosen": -2.3457345962524414, "logits/rejected": -2.0774590969085693, "logps/chosen": -298.05279541015625, "logps/rejected": -271.5147705078125, "loss": 0.6879, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.03925010561943054, "rewards/margins": 0.1122470498085022, "rewards/rejected": -0.07299693673849106, "step": 15050 }, { "epoch": 0.99, "learning_rate": 3.2710702839139353e-09, "logits/chosen": -2.373473644256592, "logits/rejected": -2.229529857635498, "logps/chosen": -202.07412719726562, "logps/rejected": -216.16708374023438, "loss": 0.6918, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.025251392275094986, "rewards/margins": 0.0481976643204689, "rewards/rejected": -0.022946273908019066, "step": 15060 }, { "epoch": 0.99, "learning_rate": 2.9855864781272448e-09, "logits/chosen": -2.3376505374908447, "logits/rejected": -2.3747403621673584, "logps/chosen": -209.34140014648438, "logps/rejected": -250.8092498779297, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.023322973400354385, "rewards/margins": 0.06673813611268997, "rewards/rejected": -0.04341515898704529, "step": 15070 }, { "epoch": 0.99, "learning_rate": 2.7131283226977665e-09, "logits/chosen": -2.3406639099121094, "logits/rejected": -2.4078879356384277, "logps/chosen": -217.74288940429688, "logps/rejected": -240.52072143554688, "loss": 0.6901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017053885385394096, "rewards/margins": 0.08927709609270096, "rewards/rejected": -0.07222320139408112, "step": 15080 }, { "epoch": 0.99, "learning_rate": 2.4536972389008205e-09, "logits/chosen": -2.2868685722351074, "logits/rejected": -2.058229446411133, "logps/chosen": -233.2720489501953, "logps/rejected": -207.40798950195312, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.038629334419965744, "rewards/margins": 0.118269182741642, "rewards/rejected": -0.07963985949754715, "step": 15090 }, { "epoch": 0.99, "learning_rate": 2.20729458005553e-09, "logits/chosen": -2.201646089553833, "logits/rejected": -2.040139675140381, "logps/chosen": -191.54600524902344, "logps/rejected": -181.17190551757812, "loss": 0.6882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.028632348403334618, "rewards/margins": 0.11827573925256729, "rewards/rejected": -0.08964338153600693, "step": 15100 }, { "epoch": 0.99, "eval_logits/chosen": -2.3254594802856445, "eval_logits/rejected": -2.1368985176086426, "eval_logps/chosen": -230.5937042236328, "eval_logps/rejected": -218.6256866455078, "eval_loss": 0.6897294521331787, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": 0.014112350530922413, "eval_rewards/margins": 0.08425014466047287, "eval_rewards/rejected": -0.07013778388500214, "eval_runtime": 710.0809, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.408, "step": 15100 }, { "epoch": 0.99, "learning_rate": 1.9739216315192712e-09, "logits/chosen": -2.3063950538635254, "logits/rejected": -2.096644163131714, "logps/chosen": -227.7020263671875, "logps/rejected": -210.0640106201172, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.016067935153841972, "rewards/margins": 0.06059398502111435, "rewards/rejected": -0.044526055455207825, "step": 15110 }, { "epoch": 0.99, "learning_rate": 1.7535796106796231e-09, "logits/chosen": -2.3938944339752197, "logits/rejected": -2.072892665863037, "logps/chosen": -276.0992431640625, "logps/rejected": -202.15260314941406, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.005628110375255346, "rewards/margins": 0.06384317576885223, "rewards/rejected": -0.06947128474712372, "step": 15120 }, { "epoch": 0.99, "learning_rate": 1.5462696669482636e-09, "logits/chosen": -2.359903335571289, "logits/rejected": -2.2396528720855713, "logps/chosen": -219.18911743164062, "logps/rejected": -233.8794403076172, "loss": 0.6885, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.020530302077531815, "rewards/margins": 0.07046308368444443, "rewards/rejected": -0.04993278905749321, "step": 15130 }, { "epoch": 0.99, "learning_rate": 1.3519928817556927e-09, "logits/chosen": -2.21510648727417, "logits/rejected": -2.1697449684143066, "logps/chosen": -171.1583251953125, "logps/rejected": -178.51443481445312, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.02235172688961029, "rewards/margins": 0.0680917277932167, "rewards/rejected": -0.045739997178316116, "step": 15140 }, { "epoch": 0.99, "learning_rate": 1.1707502685448512e-09, "logits/chosen": -2.445695638656616, "logits/rejected": -2.1078381538391113, "logps/chosen": -222.18508911132812, "logps/rejected": -176.2418975830078, "loss": 0.6863, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0008492677588947117, "rewards/margins": 0.12245283275842667, "rewards/rejected": -0.12160356342792511, "step": 15150 }, { "epoch": 0.99, "learning_rate": 1.002542772765569e-09, "logits/chosen": -2.2757375240325928, "logits/rejected": -1.9920152425765991, "logps/chosen": -187.7380828857422, "logps/rejected": -158.5767059326172, "loss": 0.6893, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008657276630401611, "rewards/margins": 0.09146241843700409, "rewards/rejected": -0.08280514925718307, "step": 15160 }, { "epoch": 0.99, "learning_rate": 8.473712718709559e-10, "logits/chosen": -2.1393208503723145, "logits/rejected": -2.1515212059020996, "logps/chosen": -193.9955596923828, "logps/rejected": -187.0467987060547, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.002506362274289131, "rewards/margins": 0.03829359635710716, "rewards/rejected": -0.040799956768751144, "step": 15170 }, { "epoch": 0.99, "learning_rate": 7.052365753112966e-10, "logits/chosen": -2.1680989265441895, "logits/rejected": -1.8749319314956665, "logps/chosen": -234.80142211914062, "logps/rejected": -226.61306762695312, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0051465281285345554, "rewards/margins": 0.10587289184331894, "rewards/rejected": -0.10072635114192963, "step": 15180 }, { "epoch": 0.99, "learning_rate": 5.761394245307195e-10, "logits/chosen": -2.155925989151001, "logits/rejected": -2.18461275100708, "logps/chosen": -235.2099609375, "logps/rejected": -238.4646759033203, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.018722042441368103, "rewards/margins": 0.04466164484620094, "rewards/rejected": -0.02593959867954254, "step": 15190 }, { "epoch": 0.99, "learning_rate": 4.6008049296358826e-10, "logits/chosen": -2.229979991912842, "logits/rejected": -2.126420736312866, "logps/chosen": -185.88441467285156, "logps/rejected": -165.7515411376953, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.0013815786223858595, "rewards/margins": 0.08110791444778442, "rewards/rejected": -0.07972635328769684, "step": 15200 }, { "epoch": 0.99, "eval_logits/chosen": -2.325113296508789, "eval_logits/rejected": -2.1365654468536377, "eval_logps/chosen": -230.59994506835938, "eval_logps/rejected": -218.62445068359375, "eval_loss": 0.6897304654121399, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": 0.014050180092453957, "eval_rewards/margins": 0.08417567610740662, "eval_rewards/rejected": -0.07012549042701721, "eval_runtime": 709.6375, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 15200 }, { "epoch": 1.0, "learning_rate": 3.5706038603006146e-10, "logits/chosen": -2.4180667400360107, "logits/rejected": -2.3709511756896973, "logps/chosen": -284.6794128417969, "logps/rejected": -279.5739440917969, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04930936545133591, "rewards/margins": 0.09137637913227081, "rewards/rejected": -0.04206700250506401, "step": 15210 }, { "epoch": 1.0, "learning_rate": 2.670796411333165e-10, "logits/chosen": -2.588311195373535, "logits/rejected": -2.297461986541748, "logps/chosen": -219.0515594482422, "logps/rejected": -215.50772094726562, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.00759897381067276, "rewards/margins": 0.08762288093566895, "rewards/rejected": -0.08002390712499619, "step": 15220 }, { "epoch": 1.0, "learning_rate": 1.9013872765677455e-10, "logits/chosen": -2.3167214393615723, "logits/rejected": -2.108008861541748, "logps/chosen": -216.2996368408203, "logps/rejected": -207.8689422607422, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": 0.03213484585285187, "rewards/margins": 0.04103900119662285, "rewards/rejected": -0.008904160931706429, "step": 15230 }, { "epoch": 1.0, "learning_rate": 1.262380469624347e-10, "logits/chosen": -2.2934911251068115, "logits/rejected": -2.121281862258911, "logps/chosen": -202.36239624023438, "logps/rejected": -185.4467010498047, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01644532009959221, "rewards/margins": 0.05994013696908951, "rewards/rejected": -0.0434948205947876, "step": 15240 }, { "epoch": 1.0, "learning_rate": 7.53779323872661e-11, "logits/chosen": -2.1865756511688232, "logits/rejected": -2.289961576461792, "logps/chosen": -190.6034698486328, "logps/rejected": -205.8804931640625, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012599390931427479, "rewards/margins": 0.09233128279447556, "rewards/rejected": -0.07973189651966095, "step": 15250 }, { "epoch": 1.0, "learning_rate": 3.7558649242652734e-11, "logits/chosen": -2.4936611652374268, "logits/rejected": -2.2574238777160645, "logps/chosen": -395.0047912597656, "logps/rejected": -322.8507995605469, "loss": 0.6908, "rewards/accuracies": 0.75, "rewards/chosen": 0.016790423542261124, "rewards/margins": 0.07416915148496628, "rewards/rejected": -0.05737873911857605, "step": 15260 }, { "epoch": 1.0, "learning_rate": 1.2780394812450526e-11, "logits/chosen": -2.1327195167541504, "logits/rejected": -2.085716724395752, "logps/chosen": -233.3424072265625, "logps/rejected": -242.1055145263672, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.011986208148300648, "rewards/margins": 0.08845750242471695, "rewards/rejected": -0.10044372081756592, "step": 15270 }, { "epoch": 1.0, "learning_rate": 1.0432983521546646e-12, "logits/chosen": -2.170581817626953, "logits/rejected": -2.0781712532043457, "logps/chosen": -182.6875, "logps/rejected": -211.4102325439453, "loss": 0.6886, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.007787027861922979, "rewards/margins": 0.09877107292413712, "rewards/rejected": -0.0909840390086174, "step": 15280 }, { "epoch": 1.0, "step": 15284, "total_flos": 0.0, "train_loss": 0.6900739747015976, "train_runtime": 171639.7836, "train_samples_per_second": 0.356, "train_steps_per_second": 0.089 } ], "logging_steps": 10, "max_steps": 15284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }