{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9983539094650205, "eval_steps": 25, "global_step": 1214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.098360655737705e-09, "logits/generated": -2.6401095390319824, "logits/real": -2.652092456817627, "logps/generated": -522.5341796875, "logps/real": -420.15106201171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.0983606557377046e-08, "logits/generated": -2.7537119388580322, "logits/real": -2.749621868133545, "logps/generated": -494.7628479003906, "logps/real": -410.4559326171875, "loss": 0.6882, "rewards/accuracies": 0.4513888955116272, "rewards/generated": -0.010888932272791862, "rewards/margins": -0.00011028432345483452, "rewards/real": -0.010999216698110104, "step": 10 }, { "epoch": 0.03, "learning_rate": 8.196721311475409e-08, "logits/generated": -2.72717022895813, "logits/real": -2.736893892288208, "logps/generated": -487.6122131347656, "logps/real": -399.52362060546875, "loss": 0.591, "rewards/accuracies": 0.731249988079071, "rewards/generated": -0.3106920123100281, "rewards/margins": 0.2161286622285843, "rewards/real": -0.09456336498260498, "step": 20 }, { "epoch": 0.04, "eval_logits/generated": -2.698356866836548, "eval_logits/real": -2.709562301635742, "eval_logps/generated": -465.1226806640625, "eval_logps/real": -419.8426208496094, "eval_loss": 0.4210089147090912, "eval_rewards/accuracies": 0.8500000238418579, "eval_rewards/generated": -1.0787537097930908, "eval_rewards/margins": 0.8287025094032288, "eval_rewards/real": -0.25005120038986206, "eval_runtime": 549.2945, "eval_samples_per_second": 7.861, "eval_steps_per_second": 0.246, "step": 25 }, { "epoch": 0.05, "learning_rate": 1.2295081967213113e-07, "logits/generated": -2.7093679904937744, "logits/real": -2.718613624572754, "logps/generated": -482.2759704589844, "logps/real": -419.0752868652344, "loss": 0.4538, "rewards/accuracies": 0.862500011920929, "rewards/generated": -0.9867717623710632, "rewards/margins": 0.7356894016265869, "rewards/real": -0.2510823607444763, "step": 30 }, { "epoch": 0.07, "learning_rate": 1.6393442622950818e-07, "logits/generated": -2.679270029067993, "logits/real": -2.691554307937622, "logps/generated": -500.12786865234375, "logps/real": -385.42303466796875, "loss": 0.3118, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -1.5584895610809326, "rewards/margins": 1.3963291645050049, "rewards/real": -0.16216044127941132, "step": 40 }, { "epoch": 0.08, "learning_rate": 2.0491803278688524e-07, "logits/generated": -2.6619114875793457, "logits/real": -2.6760687828063965, "logps/generated": -513.6600952148438, "logps/real": -445.169677734375, "loss": 0.2223, "rewards/accuracies": 0.9375, "rewards/generated": -2.419264316558838, "rewards/margins": 2.2213003635406494, "rewards/real": -0.19796383380889893, "step": 50 }, { "epoch": 0.08, "eval_logits/generated": -2.630603551864624, "eval_logits/real": -2.6445798873901367, "eval_logps/generated": -485.2113342285156, "eval_logps/real": -423.0011291503906, "eval_loss": 0.2172713428735733, "eval_rewards/accuracies": 0.9175925850868225, "eval_rewards/generated": -3.0876214504241943, "eval_rewards/margins": 2.5217204093933105, "eval_rewards/real": -0.5659011602401733, "eval_runtime": 547.8486, "eval_samples_per_second": 7.882, "eval_steps_per_second": 0.246, "step": 50 }, { "epoch": 0.1, "learning_rate": 2.4590163934426226e-07, "logits/generated": -2.6456358432769775, "logits/real": -2.656313419342041, "logps/generated": -483.8861389160156, "logps/real": -392.04681396484375, "loss": 0.1852, "rewards/accuracies": 0.925000011920929, "rewards/generated": -3.4535624980926514, "rewards/margins": 2.996718168258667, "rewards/real": -0.45684438943862915, "step": 60 }, { "epoch": 0.12, "learning_rate": 2.868852459016393e-07, "logits/generated": -2.593245029449463, "logits/real": -2.633634328842163, "logps/generated": -518.401611328125, "logps/real": -430.7620544433594, "loss": 0.168, "rewards/accuracies": 0.949999988079071, "rewards/generated": -3.725245237350464, "rewards/margins": 3.4510974884033203, "rewards/real": -0.27414828538894653, "step": 70 }, { "epoch": 0.12, "eval_logits/generated": -2.583158254623413, "eval_logits/real": -2.600541353225708, "eval_logps/generated": -499.10601806640625, "eval_logps/real": -424.4022216796875, "eval_loss": 0.15319335460662842, "eval_rewards/accuracies": 0.9435185194015503, "eval_rewards/generated": -4.4770894050598145, "eval_rewards/margins": 3.7710745334625244, "eval_rewards/real": -0.7060146927833557, "eval_runtime": 550.1138, "eval_samples_per_second": 7.849, "eval_steps_per_second": 0.245, "step": 75 }, { "epoch": 0.13, "learning_rate": 3.2786885245901637e-07, "logits/generated": -2.5965609550476074, "logits/real": -2.6199886798858643, "logps/generated": -503.3633728027344, "logps/real": -432.483642578125, "loss": 0.1364, "rewards/accuracies": 0.949999988079071, "rewards/generated": -4.042363166809082, "rewards/margins": 3.511378765106201, "rewards/real": -0.5309839248657227, "step": 80 }, { "epoch": 0.15, "learning_rate": 3.6885245901639347e-07, "logits/generated": -2.568969249725342, "logits/real": -2.5813772678375244, "logps/generated": -529.686767578125, "logps/real": -427.0670471191406, "loss": 0.1207, "rewards/accuracies": 0.956250011920929, "rewards/generated": -5.874642848968506, "rewards/margins": 5.00606632232666, "rewards/real": -0.8685771822929382, "step": 90 }, { "epoch": 0.16, "learning_rate": 4.0983606557377047e-07, "logits/generated": -2.590125560760498, "logits/real": -2.594589948654175, "logps/generated": -532.7440185546875, "logps/real": -431.35028076171875, "loss": 0.1126, "rewards/accuracies": 0.956250011920929, "rewards/generated": -5.600076198577881, "rewards/margins": 4.819561958312988, "rewards/real": -0.7805139422416687, "step": 100 }, { "epoch": 0.16, "eval_logits/generated": -2.596092939376831, "eval_logits/real": -2.611849784851074, "eval_logps/generated": -517.4968872070312, "eval_logps/real": -430.088623046875, "eval_loss": 0.12175341695547104, "eval_rewards/accuracies": 0.9509259462356567, "eval_rewards/generated": -6.31616735458374, "eval_rewards/margins": 5.041518211364746, "eval_rewards/real": -1.2746495008468628, "eval_runtime": 548.1161, "eval_samples_per_second": 7.878, "eval_steps_per_second": 0.246, "step": 100 }, { "epoch": 0.18, "learning_rate": 4.508196721311475e-07, "logits/generated": -2.637228012084961, "logits/real": -2.649094343185425, "logps/generated": -518.5374145507812, "logps/real": -405.7285461425781, "loss": 0.1257, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.386349678039551, "rewards/margins": 5.354151725769043, "rewards/real": -1.0321977138519287, "step": 110 }, { "epoch": 0.2, "learning_rate": 4.918032786885245e-07, "logits/generated": -2.613947868347168, "logits/real": -2.6607353687286377, "logps/generated": -521.7745361328125, "logps/real": -417.6166076660156, "loss": 0.0854, "rewards/accuracies": 0.981249988079071, "rewards/generated": -7.714804649353027, "rewards/margins": 6.425919532775879, "rewards/real": -1.2888853549957275, "step": 120 }, { "epoch": 0.21, "eval_logits/generated": -2.553359270095825, "eval_logits/real": -2.585937023162842, "eval_logps/generated": -544.7130126953125, "eval_logps/real": -435.28656005859375, "eval_loss": 0.09206286817789078, "eval_rewards/accuracies": 0.9611111283302307, "eval_rewards/generated": -9.037795066833496, "eval_rewards/margins": 7.243347644805908, "eval_rewards/real": -1.7944461107254028, "eval_runtime": 549.7281, "eval_samples_per_second": 7.855, "eval_steps_per_second": 0.246, "step": 125 }, { "epoch": 0.21, "learning_rate": 4.963369963369964e-07, "logits/generated": -2.5732762813568115, "logits/real": -2.6250641345977783, "logps/generated": -594.252197265625, "logps/real": -480.178955078125, "loss": 0.1156, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.990375518798828, "rewards/margins": 7.42321252822876, "rewards/real": -1.5671632289886475, "step": 130 }, { "epoch": 0.23, "learning_rate": 4.917582417582417e-07, "logits/generated": -2.588108777999878, "logits/real": -2.616245746612549, "logps/generated": -536.1387939453125, "logps/real": -419.4244079589844, "loss": 0.0895, "rewards/accuracies": 0.949999988079071, "rewards/generated": -8.070068359375, "rewards/margins": 6.980319976806641, "rewards/real": -1.089747667312622, "step": 140 }, { "epoch": 0.25, "learning_rate": 4.871794871794871e-07, "logits/generated": -2.6047449111938477, "logits/real": -2.6315391063690186, "logps/generated": -543.27978515625, "logps/real": -417.0159606933594, "loss": 0.0609, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.380901336669922, "rewards/margins": 7.630522727966309, "rewards/real": -0.7503789067268372, "step": 150 }, { "epoch": 0.25, "eval_logits/generated": -2.587456703186035, "eval_logits/real": -2.6238744258880615, "eval_logps/generated": -546.2610473632812, "eval_logps/real": -434.2024841308594, "eval_loss": 0.07384903728961945, "eval_rewards/accuracies": 0.9638888835906982, "eval_rewards/generated": -9.192586898803711, "eval_rewards/margins": 7.506547451019287, "eval_rewards/real": -1.686038851737976, "eval_runtime": 548.206, "eval_samples_per_second": 7.877, "eval_steps_per_second": 0.246, "step": 150 }, { "epoch": 0.26, "learning_rate": 4.826007326007326e-07, "logits/generated": -2.591831684112549, "logits/real": -2.607217788696289, "logps/generated": -571.234619140625, "logps/real": -392.61004638671875, "loss": 0.0855, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -9.883105278015137, "rewards/margins": 7.8926191329956055, "rewards/real": -1.9904861450195312, "step": 160 }, { "epoch": 0.28, "learning_rate": 4.78021978021978e-07, "logits/generated": -2.554612874984741, "logits/real": -2.5981972217559814, "logps/generated": -613.7628173828125, "logps/real": -422.0660095214844, "loss": 0.0654, "rewards/accuracies": 0.981249988079071, "rewards/generated": -11.727653503417969, "rewards/margins": 9.922538757324219, "rewards/real": -1.8051135540008545, "step": 170 }, { "epoch": 0.29, "eval_logits/generated": -2.5252153873443604, "eval_logits/real": -2.5698459148406982, "eval_logps/generated": -552.5237426757812, "eval_logps/real": -437.7025451660156, "eval_loss": 0.07325886934995651, "eval_rewards/accuracies": 0.9648148417472839, "eval_rewards/generated": -9.818856239318848, "eval_rewards/margins": 7.782812595367432, "eval_rewards/real": -2.036044120788574, "eval_runtime": 538.2331, "eval_samples_per_second": 8.023, "eval_steps_per_second": 0.251, "step": 175 }, { "epoch": 0.3, "learning_rate": 4.734432234432234e-07, "logits/generated": -2.5366225242614746, "logits/real": -2.5802321434020996, "logps/generated": -562.9288940429688, "logps/real": -426.1321716308594, "loss": 0.065, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.882379531860352, "rewards/margins": 8.783550262451172, "rewards/real": -2.098829984664917, "step": 180 }, { "epoch": 0.31, "learning_rate": 4.6886446886446884e-07, "logits/generated": -2.464695930480957, "logits/real": -2.514756917953491, "logps/generated": -585.3269653320312, "logps/real": -439.30535888671875, "loss": 0.0803, "rewards/accuracies": 0.96875, "rewards/generated": -13.198904037475586, "rewards/margins": 10.291043281555176, "rewards/real": -2.9078612327575684, "step": 190 }, { "epoch": 0.33, "learning_rate": 4.6428571428571427e-07, "logits/generated": -2.4625725746154785, "logits/real": -2.5219738483428955, "logps/generated": -573.2178955078125, "logps/real": -413.1720275878906, "loss": 0.0814, "rewards/accuracies": 0.981249988079071, "rewards/generated": -12.102069854736328, "rewards/margins": 9.350552558898926, "rewards/real": -2.751516342163086, "step": 200 }, { "epoch": 0.33, "eval_logits/generated": -2.4633901119232178, "eval_logits/real": -2.5260355472564697, "eval_logps/generated": -556.628662109375, "eval_logps/real": -440.68316650390625, "eval_loss": 0.07137465476989746, "eval_rewards/accuracies": 0.9629629850387573, "eval_rewards/generated": -10.229352951049805, "eval_rewards/margins": 7.8952484130859375, "eval_rewards/real": -2.334104299545288, "eval_runtime": 541.6446, "eval_samples_per_second": 7.972, "eval_steps_per_second": 0.249, "step": 200 }, { "epoch": 0.35, "learning_rate": 4.5970695970695965e-07, "logits/generated": -2.5092012882232666, "logits/real": -2.543381929397583, "logps/generated": -568.660888671875, "logps/real": -418.6875, "loss": 0.0591, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.36689281463623, "rewards/margins": 8.45336627960205, "rewards/real": -1.913527488708496, "step": 210 }, { "epoch": 0.36, "learning_rate": 4.551282051282051e-07, "logits/generated": -2.365678310394287, "logits/real": -2.4535679817199707, "logps/generated": -604.965087890625, "logps/real": -432.04974365234375, "loss": 0.0356, "rewards/accuracies": 0.981249988079071, "rewards/generated": -14.320533752441406, "rewards/margins": 10.759347915649414, "rewards/real": -3.5611863136291504, "step": 220 }, { "epoch": 0.37, "eval_logits/generated": -2.4310951232910156, "eval_logits/real": -2.5141642093658447, "eval_logps/generated": -568.4989624023438, "eval_logps/real": -444.03936767578125, "eval_loss": 0.06977172195911407, "eval_rewards/accuracies": 0.9666666388511658, "eval_rewards/generated": -11.4163818359375, "eval_rewards/margins": 8.746658325195312, "eval_rewards/real": -2.6697258949279785, "eval_runtime": 540.5003, "eval_samples_per_second": 7.989, "eval_steps_per_second": 0.25, "step": 225 }, { "epoch": 0.38, "learning_rate": 4.5054945054945056e-07, "logits/generated": -2.40238618850708, "logits/real": -2.490166425704956, "logps/generated": -583.9486083984375, "logps/real": -433.9161682128906, "loss": 0.0966, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.47558879852295, "rewards/margins": 8.76927661895752, "rewards/real": -2.7063136100769043, "step": 230 }, { "epoch": 0.4, "learning_rate": 4.45970695970696e-07, "logits/generated": -2.3658084869384766, "logits/real": -2.4398703575134277, "logps/generated": -579.8946533203125, "logps/real": -403.6769104003906, "loss": 0.0714, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.590639114379883, "rewards/margins": 9.855241775512695, "rewards/real": -2.735395908355713, "step": 240 }, { "epoch": 0.41, "learning_rate": 4.4139194139194137e-07, "logits/generated": -2.3249025344848633, "logits/real": -2.421391010284424, "logps/generated": -627.2833251953125, "logps/real": -436.53399658203125, "loss": 0.0641, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.549036979675293, "rewards/margins": 11.038546562194824, "rewards/real": -2.510490894317627, "step": 250 }, { "epoch": 0.41, "eval_logits/generated": -2.310610055923462, "eval_logits/real": -2.4202382564544678, "eval_logps/generated": -577.3876953125, "eval_logps/real": -441.26837158203125, "eval_loss": 0.0585518442094326, "eval_rewards/accuracies": 0.9694444537162781, "eval_rewards/generated": -12.305254936218262, "eval_rewards/margins": 9.912630081176758, "eval_rewards/real": -2.3926241397857666, "eval_runtime": 544.2791, "eval_samples_per_second": 7.933, "eval_steps_per_second": 0.248, "step": 250 }, { "epoch": 0.43, "learning_rate": 4.368131868131868e-07, "logits/generated": -2.352004289627075, "logits/real": -2.446444034576416, "logps/generated": -596.7254638671875, "logps/real": -417.0332946777344, "loss": 0.0495, "rewards/accuracies": 0.981249988079071, "rewards/generated": -12.558148384094238, "rewards/margins": 10.505435943603516, "rewards/real": -2.0527119636535645, "step": 260 }, { "epoch": 0.44, "learning_rate": 4.3223443223443223e-07, "logits/generated": -2.3381831645965576, "logits/real": -2.430725574493408, "logps/generated": -658.2124633789062, "logps/real": -449.29461669921875, "loss": 0.0442, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.05294418334961, "rewards/margins": 13.15046215057373, "rewards/real": -3.9024810791015625, "step": 270 }, { "epoch": 0.45, "eval_logits/generated": -2.3879544734954834, "eval_logits/real": -2.4772706031799316, "eval_logps/generated": -573.7975463867188, "eval_logps/real": -442.5116882324219, "eval_loss": 0.06716620177030563, "eval_rewards/accuracies": 0.9675925970077515, "eval_rewards/generated": -11.946240425109863, "eval_rewards/margins": 9.429278373718262, "eval_rewards/real": -2.516960382461548, "eval_runtime": 543.8707, "eval_samples_per_second": 7.939, "eval_steps_per_second": 0.248, "step": 275 }, { "epoch": 0.46, "learning_rate": 4.276556776556776e-07, "logits/generated": -2.2920713424682617, "logits/real": -2.4120707511901855, "logps/generated": -625.0443115234375, "logps/real": -443.11773681640625, "loss": 0.0455, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.292150497436523, "rewards/margins": 12.260358810424805, "rewards/real": -3.0317909717559814, "step": 280 }, { "epoch": 0.48, "learning_rate": 4.2307692307692304e-07, "logits/generated": -2.2416317462921143, "logits/real": -2.3849129676818848, "logps/generated": -620.5736083984375, "logps/real": -438.10302734375, "loss": 0.0638, "rewards/accuracies": 0.96875, "rewards/generated": -15.206059455871582, "rewards/margins": 11.891336441040039, "rewards/real": -3.314722776412964, "step": 290 }, { "epoch": 0.49, "learning_rate": 4.1849816849816847e-07, "logits/generated": -2.2657132148742676, "logits/real": -2.416287660598755, "logps/generated": -596.555908203125, "logps/real": -443.7845764160156, "loss": 0.0707, "rewards/accuracies": 0.96875, "rewards/generated": -13.721325874328613, "rewards/margins": 11.418547630310059, "rewards/real": -2.30277681350708, "step": 300 }, { "epoch": 0.49, "eval_logits/generated": -2.256381034851074, "eval_logits/real": -2.391341209411621, "eval_logps/generated": -605.804443359375, "eval_logps/real": -455.82989501953125, "eval_loss": 0.05395643413066864, "eval_rewards/accuracies": 0.9666666388511658, "eval_rewards/generated": -15.146928787231445, "eval_rewards/margins": 11.298150062561035, "eval_rewards/real": -3.8487789630889893, "eval_runtime": 543.6439, "eval_samples_per_second": 7.943, "eval_steps_per_second": 0.248, "step": 300 }, { "epoch": 0.51, "learning_rate": 4.1391941391941385e-07, "logits/generated": -2.2252917289733887, "logits/real": -2.3379065990448, "logps/generated": -627.6759033203125, "logps/real": -425.13201904296875, "loss": 0.0306, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.1324462890625, "rewards/margins": 12.730301856994629, "rewards/real": -3.40214467048645, "step": 310 }, { "epoch": 0.53, "learning_rate": 4.0934065934065933e-07, "logits/generated": -2.301344633102417, "logits/real": -2.4362711906433105, "logps/generated": -602.3247680664062, "logps/real": -444.23974609375, "loss": 0.0683, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.596293449401855, "rewards/margins": 10.262487411499023, "rewards/real": -3.333805799484253, "step": 320 }, { "epoch": 0.53, "eval_logits/generated": -2.1402149200439453, "eval_logits/real": -2.322171926498413, "eval_logps/generated": -636.7122802734375, "eval_logps/real": -470.3190002441406, "eval_loss": 0.057357266545295715, "eval_rewards/accuracies": 0.9666666388511658, "eval_rewards/generated": -18.237712860107422, "eval_rewards/margins": 12.940022468566895, "eval_rewards/real": -5.297689437866211, "eval_runtime": 537.4705, "eval_samples_per_second": 8.034, "eval_steps_per_second": 0.251, "step": 325 }, { "epoch": 0.54, "learning_rate": 4.0476190476190476e-07, "logits/generated": -2.1620073318481445, "logits/real": -2.3063290119171143, "logps/generated": -652.4406127929688, "logps/real": -435.88494873046875, "loss": 0.0379, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.206768035888672, "rewards/margins": 13.691610336303711, "rewards/real": -4.51515531539917, "step": 330 }, { "epoch": 0.56, "learning_rate": 4.001831501831502e-07, "logits/generated": -2.236128330230713, "logits/real": -2.3870835304260254, "logps/generated": -652.0904541015625, "logps/real": -450.24859619140625, "loss": 0.0553, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.906084060668945, "rewards/margins": 14.117843627929688, "rewards/real": -3.7882392406463623, "step": 340 }, { "epoch": 0.58, "learning_rate": 3.9560439560439557e-07, "logits/generated": -2.176945209503174, "logits/real": -2.368508815765381, "logps/generated": -651.4620361328125, "logps/real": -432.74658203125, "loss": 0.0339, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.662532806396484, "rewards/margins": 14.847686767578125, "rewards/real": -3.8148434162139893, "step": 350 }, { "epoch": 0.58, "eval_logits/generated": -2.1701033115386963, "eval_logits/real": -2.3731424808502197, "eval_logps/generated": -627.2608032226562, "eval_logps/real": -454.8285827636719, "eval_loss": 0.04949037358164787, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -17.292573928833008, "eval_rewards/margins": 13.543929100036621, "eval_rewards/real": -3.748645305633545, "eval_runtime": 537.9029, "eval_samples_per_second": 8.027, "eval_steps_per_second": 0.251, "step": 350 }, { "epoch": 0.59, "learning_rate": 3.91025641025641e-07, "logits/generated": -2.2481982707977295, "logits/real": -2.3865692615509033, "logps/generated": -664.8062744140625, "logps/real": -430.34857177734375, "loss": 0.0464, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.22327423095703, "rewards/margins": 13.856378555297852, "rewards/real": -3.366894245147705, "step": 360 }, { "epoch": 0.61, "learning_rate": 3.8644688644688643e-07, "logits/generated": -2.1518349647521973, "logits/real": -2.3465476036071777, "logps/generated": -613.2617797851562, "logps/real": -446.73834228515625, "loss": 0.0648, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.795753479003906, "rewards/margins": 14.179173469543457, "rewards/real": -4.616580009460449, "step": 370 }, { "epoch": 0.62, "eval_logits/generated": -2.316718339920044, "eval_logits/real": -2.478287935256958, "eval_logps/generated": -586.93896484375, "eval_logps/real": -441.6444091796875, "eval_loss": 0.053722720593214035, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/generated": -13.260376930236816, "eval_rewards/margins": 10.830145835876465, "eval_rewards/real": -2.4302310943603516, "eval_runtime": 539.3726, "eval_samples_per_second": 8.006, "eval_steps_per_second": 0.25, "step": 375 }, { "epoch": 0.63, "learning_rate": 3.818681318681318e-07, "logits/generated": -2.279561758041382, "logits/real": -2.478255033493042, "logps/generated": -601.7288208007812, "logps/real": -449.01702880859375, "loss": 0.0427, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.500157356262207, "rewards/margins": 11.545099258422852, "rewards/real": -2.9550578594207764, "step": 380 }, { "epoch": 0.64, "learning_rate": 3.7728937728937724e-07, "logits/generated": -2.285148859024048, "logits/real": -2.452268123626709, "logps/generated": -673.3703002929688, "logps/real": -449.6754455566406, "loss": 0.0272, "rewards/accuracies": 0.981249988079071, "rewards/generated": -17.92129898071289, "rewards/margins": 14.363734245300293, "rewards/real": -3.5575671195983887, "step": 390 }, { "epoch": 0.66, "learning_rate": 3.727106227106227e-07, "logits/generated": -2.158463954925537, "logits/real": -2.3927464485168457, "logps/generated": -665.3966064453125, "logps/real": -455.4427185058594, "loss": 0.0358, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -20.033100128173828, "rewards/margins": 16.113994598388672, "rewards/real": -3.9191043376922607, "step": 400 }, { "epoch": 0.66, "eval_logits/generated": -2.1734633445739746, "eval_logits/real": -2.3874428272247314, "eval_logps/generated": -627.7240600585938, "eval_logps/real": -455.8509216308594, "eval_loss": 0.04597338289022446, "eval_rewards/accuracies": 0.9740740656852722, "eval_rewards/generated": -17.338891983032227, "eval_rewards/margins": 13.488015174865723, "eval_rewards/real": -3.8508799076080322, "eval_runtime": 545.3223, "eval_samples_per_second": 7.918, "eval_steps_per_second": 0.248, "step": 400 }, { "epoch": 0.67, "learning_rate": 3.6813186813186816e-07, "logits/generated": -2.1853232383728027, "logits/real": -2.3945891857147217, "logps/generated": -642.46044921875, "logps/real": -399.55694580078125, "loss": 0.0221, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -18.870121002197266, "rewards/margins": 16.129247665405273, "rewards/real": -2.7408719062805176, "step": 410 }, { "epoch": 0.69, "learning_rate": 3.6355311355311353e-07, "logits/generated": -2.1754229068756104, "logits/real": -2.381279468536377, "logps/generated": -595.3236083984375, "logps/real": -405.1600341796875, "loss": 0.0532, "rewards/accuracies": 0.981249988079071, "rewards/generated": -14.932568550109863, "rewards/margins": 12.319165229797363, "rewards/real": -2.6134040355682373, "step": 420 }, { "epoch": 0.7, "eval_logits/generated": -2.1550426483154297, "eval_logits/real": -2.3750507831573486, "eval_logps/generated": -636.3655395507812, "eval_logps/real": -460.6029052734375, "eval_loss": 0.048347219824790955, "eval_rewards/accuracies": 0.9740740656852722, "eval_rewards/generated": -18.20302963256836, "eval_rewards/margins": 13.876949310302734, "eval_rewards/real": -4.32607889175415, "eval_runtime": 546.9938, "eval_samples_per_second": 7.894, "eval_steps_per_second": 0.247, "step": 425 }, { "epoch": 0.71, "learning_rate": 3.5897435897435896e-07, "logits/generated": -2.183345317840576, "logits/real": -2.4247512817382812, "logps/generated": -591.2523193359375, "logps/real": -400.4705810546875, "loss": 0.043, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.82411766052246, "rewards/margins": 14.277923583984375, "rewards/real": -2.5461928844451904, "step": 430 }, { "epoch": 0.72, "learning_rate": 3.543956043956044e-07, "logits/generated": -2.2292613983154297, "logits/real": -2.4110050201416016, "logps/generated": -626.2135620117188, "logps/real": -403.21368408203125, "loss": 0.0332, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.94637680053711, "rewards/margins": 15.042144775390625, "rewards/real": -2.9042327404022217, "step": 440 }, { "epoch": 0.74, "learning_rate": 3.4981684981684977e-07, "logits/generated": -2.2957308292388916, "logits/real": -2.483898639678955, "logps/generated": -624.1409301757812, "logps/real": -416.9951171875, "loss": 0.0408, "rewards/accuracies": 0.981249988079071, "rewards/generated": -17.344409942626953, "rewards/margins": 14.197443962097168, "rewards/real": -3.1469688415527344, "step": 450 }, { "epoch": 0.74, "eval_logits/generated": -2.29823899269104, "eval_logits/real": -2.481050968170166, "eval_logps/generated": -651.6072998046875, "eval_logps/real": -466.22760009765625, "eval_loss": 0.05667389929294586, "eval_rewards/accuracies": 0.9740740656852722, "eval_rewards/generated": -19.727210998535156, "eval_rewards/margins": 14.838663101196289, "eval_rewards/real": -4.888548851013184, "eval_runtime": 546.6533, "eval_samples_per_second": 7.899, "eval_steps_per_second": 0.247, "step": 450 }, { "epoch": 0.76, "learning_rate": 3.452380952380952e-07, "logits/generated": -2.3011131286621094, "logits/real": -2.4612629413604736, "logps/generated": -647.5547485351562, "logps/real": -426.67205810546875, "loss": 0.0449, "rewards/accuracies": 0.96875, "rewards/generated": -17.85089111328125, "rewards/margins": 14.307988166809082, "rewards/real": -3.5429024696350098, "step": 460 }, { "epoch": 0.77, "learning_rate": 3.4065934065934063e-07, "logits/generated": -2.1878461837768555, "logits/real": -2.4027976989746094, "logps/generated": -651.8814697265625, "logps/real": -432.30828857421875, "loss": 0.0434, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -18.791553497314453, "rewards/margins": 14.97038459777832, "rewards/real": -3.8211684226989746, "step": 470 }, { "epoch": 0.78, "eval_logits/generated": -2.1936686038970947, "eval_logits/real": -2.424220323562622, "eval_logps/generated": -615.4547729492188, "eval_logps/real": -446.01873779296875, "eval_loss": 0.04669804871082306, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -16.111957550048828, "eval_rewards/margins": 13.244292259216309, "eval_rewards/real": -2.8676631450653076, "eval_runtime": 548.2901, "eval_samples_per_second": 7.875, "eval_steps_per_second": 0.246, "step": 475 }, { "epoch": 0.79, "learning_rate": 3.360805860805861e-07, "logits/generated": -2.170032501220703, "logits/real": -2.3914527893066406, "logps/generated": -638.609619140625, "logps/real": -419.04473876953125, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/generated": -17.431259155273438, "rewards/margins": 14.863021850585938, "rewards/real": -2.568234920501709, "step": 480 }, { "epoch": 0.81, "learning_rate": 3.315018315018315e-07, "logits/generated": -2.125486135482788, "logits/real": -2.35538911819458, "logps/generated": -608.1831665039062, "logps/real": -426.45733642578125, "loss": 0.0437, "rewards/accuracies": 0.96875, "rewards/generated": -16.634971618652344, "rewards/margins": 13.581262588500977, "rewards/real": -3.053709030151367, "step": 490 }, { "epoch": 0.82, "learning_rate": 3.269230769230769e-07, "logits/generated": -2.0204501152038574, "logits/real": -2.329942226409912, "logps/generated": -684.5260620117188, "logps/real": -425.2499084472656, "loss": 0.0194, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -20.701831817626953, "rewards/margins": 17.69593620300293, "rewards/real": -3.0058956146240234, "step": 500 }, { "epoch": 0.82, "eval_logits/generated": -2.0107169151306152, "eval_logits/real": -2.3290646076202393, "eval_logps/generated": -639.0421752929688, "eval_logps/real": -449.8150939941406, "eval_loss": 0.04547751694917679, "eval_rewards/accuracies": 0.9768518805503845, "eval_rewards/generated": -18.4707088470459, "eval_rewards/margins": 15.223410606384277, "eval_rewards/real": -3.2472991943359375, "eval_runtime": 547.436, "eval_samples_per_second": 7.888, "eval_steps_per_second": 0.247, "step": 500 }, { "epoch": 0.84, "learning_rate": 3.2234432234432236e-07, "logits/generated": -2.1318891048431396, "logits/real": -2.373379707336426, "logps/generated": -664.7022705078125, "logps/real": -426.9652404785156, "loss": 0.0487, "rewards/accuracies": 0.96875, "rewards/generated": -20.184797286987305, "rewards/margins": 16.790945053100586, "rewards/real": -3.393852949142456, "step": 510 }, { "epoch": 0.86, "learning_rate": 3.1776556776556773e-07, "logits/generated": -2.1151123046875, "logits/real": -2.3613884449005127, "logps/generated": -723.1424560546875, "logps/real": -472.94879150390625, "loss": 0.0227, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.221763610839844, "rewards/margins": 17.593347549438477, "rewards/real": -3.6284186840057373, "step": 520 }, { "epoch": 0.86, "eval_logits/generated": -2.214646100997925, "eval_logits/real": -2.409980297088623, "eval_logps/generated": -655.4663696289062, "eval_logps/real": -463.1470947265625, "eval_loss": 0.05429470166563988, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -20.113121032714844, "eval_rewards/margins": 15.532620429992676, "eval_rewards/real": -4.580500602722168, "eval_runtime": 546.7755, "eval_samples_per_second": 7.897, "eval_steps_per_second": 0.247, "step": 525 }, { "epoch": 0.87, "learning_rate": 3.1318681318681316e-07, "logits/generated": -2.1360554695129395, "logits/real": -2.357234001159668, "logps/generated": -686.1627197265625, "logps/real": -463.78594970703125, "loss": 0.1081, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.75899887084961, "rewards/margins": 17.28326416015625, "rewards/real": -4.475732803344727, "step": 530 }, { "epoch": 0.89, "learning_rate": 3.086080586080586e-07, "logits/generated": -2.198076009750366, "logits/real": -2.3724374771118164, "logps/generated": -632.1812133789062, "logps/real": -408.17999267578125, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/generated": -17.84307098388672, "rewards/margins": 14.931114196777344, "rewards/real": -2.911957263946533, "step": 540 }, { "epoch": 0.91, "learning_rate": 3.0402930402930397e-07, "logits/generated": -2.0505592823028564, "logits/real": -2.2820277214050293, "logps/generated": -675.8702392578125, "logps/real": -435.8621520996094, "loss": 0.0299, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -21.609846115112305, "rewards/margins": 17.412473678588867, "rewards/real": -4.197373390197754, "step": 550 }, { "epoch": 0.91, "eval_logits/generated": -2.055241823196411, "eval_logits/real": -2.3300585746765137, "eval_logps/generated": -658.2036743164062, "eval_logps/real": -460.3627014160156, "eval_loss": 0.04806026816368103, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -20.386852264404297, "eval_rewards/margins": 16.084793090820312, "eval_rewards/real": -4.302060604095459, "eval_runtime": 545.5178, "eval_samples_per_second": 7.915, "eval_steps_per_second": 0.247, "step": 550 }, { "epoch": 0.92, "learning_rate": 2.994505494505494e-07, "logits/generated": -2.05330491065979, "logits/real": -2.319866418838501, "logps/generated": -671.1466674804688, "logps/real": -431.24298095703125, "loss": 0.0253, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.11385154724121, "rewards/margins": 16.92177963256836, "rewards/real": -4.1920695304870605, "step": 560 }, { "epoch": 0.94, "learning_rate": 2.948717948717949e-07, "logits/generated": -1.8845161199569702, "logits/real": -2.2347493171691895, "logps/generated": -694.9982299804688, "logps/real": -455.690185546875, "loss": 0.0218, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -24.400421142578125, "rewards/margins": 19.314403533935547, "rewards/real": -5.086018085479736, "step": 570 }, { "epoch": 0.95, "eval_logits/generated": -1.9225108623504639, "eval_logits/real": -2.2634739875793457, "eval_logps/generated": -657.9219970703125, "eval_logps/real": -461.96160888671875, "eval_loss": 0.04638593643903732, "eval_rewards/accuracies": 0.9712963104248047, "eval_rewards/generated": -20.358694076538086, "eval_rewards/margins": 15.896740913391113, "eval_rewards/real": -4.461949348449707, "eval_runtime": 549.2313, "eval_samples_per_second": 7.862, "eval_steps_per_second": 0.246, "step": 575 }, { "epoch": 0.95, "learning_rate": 2.902930402930403e-07, "logits/generated": -1.9606196880340576, "logits/real": -2.286708354949951, "logps/generated": -673.0288696289062, "logps/real": -418.47332763671875, "loss": 0.0254, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.76241683959961, "rewards/margins": 18.28946304321289, "rewards/real": -3.472952365875244, "step": 580 }, { "epoch": 0.97, "learning_rate": 2.857142857142857e-07, "logits/generated": -1.8293163776397705, "logits/real": -2.22106671333313, "logps/generated": -645.2640380859375, "logps/real": -431.3738708496094, "loss": 0.0245, "rewards/accuracies": 0.981249988079071, "rewards/generated": -22.095638275146484, "rewards/margins": 17.28582000732422, "rewards/real": -4.8098225593566895, "step": 590 }, { "epoch": 0.99, "learning_rate": 2.811355311355311e-07, "logits/generated": -1.9109601974487305, "logits/real": -2.295834541320801, "logps/generated": -623.3287963867188, "logps/real": -419.62939453125, "loss": 0.0218, "rewards/accuracies": 0.96875, "rewards/generated": -19.795055389404297, "rewards/margins": 15.8367280960083, "rewards/real": -3.9583258628845215, "step": 600 }, { "epoch": 0.99, "eval_logits/generated": -1.9517701864242554, "eval_logits/real": -2.2964491844177246, "eval_logps/generated": -664.1465454101562, "eval_logps/real": -470.55169677734375, "eval_loss": 0.04514331370592117, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/generated": -20.981136322021484, "eval_rewards/margins": 15.660176277160645, "eval_rewards/real": -5.320960521697998, "eval_runtime": 544.3521, "eval_samples_per_second": 7.932, "eval_steps_per_second": 0.248, "step": 600 }, { "epoch": 1.0, "learning_rate": 2.7655677655677655e-07, "logits/generated": -1.8589156866073608, "logits/real": -2.271904468536377, "logps/generated": -683.95654296875, "logps/real": -455.6640625, "loss": 0.0219, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -22.11398696899414, "rewards/margins": 17.55306053161621, "rewards/real": -4.560924530029297, "step": 610 }, { "epoch": 1.02, "learning_rate": 2.7197802197802193e-07, "logits/generated": -1.9177764654159546, "logits/real": -2.2215471267700195, "logps/generated": -666.0201416015625, "logps/real": -429.0519104003906, "loss": 0.0093, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -18.098499298095703, "rewards/margins": 15.7960205078125, "rewards/real": -2.302478790283203, "step": 620 }, { "epoch": 1.03, "eval_logits/generated": -1.7574790716171265, "eval_logits/real": -2.170833110809326, "eval_logps/generated": -647.0514526367188, "eval_logps/real": -460.7373962402344, "eval_loss": 0.042933978140354156, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -19.27163314819336, "eval_rewards/margins": 14.932106971740723, "eval_rewards/real": -4.3395256996154785, "eval_runtime": 542.4468, "eval_samples_per_second": 7.96, "eval_steps_per_second": 0.249, "step": 625 }, { "epoch": 1.04, "learning_rate": 2.6739926739926736e-07, "logits/generated": -1.8621714115142822, "logits/real": -2.1959869861602783, "logps/generated": -680.0298461914062, "logps/real": -468.26580810546875, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/generated": -19.896984100341797, "rewards/margins": 16.92976951599121, "rewards/real": -2.9672141075134277, "step": 630 }, { "epoch": 1.05, "learning_rate": 2.628205128205128e-07, "logits/generated": -2.157670736312866, "logits/real": -2.3523807525634766, "logps/generated": -636.9368286132812, "logps/real": -439.37841796875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -15.98005199432373, "rewards/margins": 13.84521198272705, "rewards/real": -2.1348421573638916, "step": 640 }, { "epoch": 1.07, "learning_rate": 2.582417582417583e-07, "logits/generated": -1.9907798767089844, "logits/real": -2.246988534927368, "logps/generated": -686.9197387695312, "logps/real": -434.936767578125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/generated": -20.37822914123535, "rewards/margins": 17.642370223999023, "rewards/real": -2.7358591556549072, "step": 650 }, { "epoch": 1.07, "eval_logits/generated": -1.8154959678649902, "eval_logits/real": -2.1757171154022217, "eval_logps/generated": -645.0802001953125, "eval_logps/real": -458.6592712402344, "eval_loss": 0.049214281141757965, "eval_rewards/accuracies": 0.970370352268219, "eval_rewards/generated": -19.07451057434082, "eval_rewards/margins": 14.942792892456055, "eval_rewards/real": -4.131715297698975, "eval_runtime": 543.2173, "eval_samples_per_second": 7.949, "eval_steps_per_second": 0.249, "step": 650 }, { "epoch": 1.09, "learning_rate": 2.5366300366300365e-07, "logits/generated": -1.7823787927627563, "logits/real": -2.136547565460205, "logps/generated": -721.7634887695312, "logps/real": -412.015625, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.206735610961914, "rewards/margins": 20.0247745513916, "rewards/real": -4.181961536407471, "step": 660 }, { "epoch": 1.1, "learning_rate": 2.490842490842491e-07, "logits/generated": -1.7356059551239014, "logits/real": -2.120824098587036, "logps/generated": -722.6532592773438, "logps/real": -429.0586853027344, "loss": 0.0059, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.548004150390625, "rewards/margins": 20.72186851501465, "rewards/real": -4.826132774353027, "step": 670 }, { "epoch": 1.11, "eval_logits/generated": -1.6843816041946411, "eval_logits/real": -2.1123154163360596, "eval_logps/generated": -685.91259765625, "eval_logps/real": -474.67840576171875, "eval_loss": 0.04492348060011864, "eval_rewards/accuracies": 0.9712963104248047, "eval_rewards/generated": -23.157739639282227, "eval_rewards/margins": 17.424118041992188, "eval_rewards/real": -5.733626365661621, "eval_runtime": 538.5314, "eval_samples_per_second": 8.018, "eval_steps_per_second": 0.251, "step": 675 }, { "epoch": 1.12, "learning_rate": 2.4450549450549446e-07, "logits/generated": -1.6796834468841553, "logits/real": -2.1368143558502197, "logps/generated": -713.38916015625, "logps/real": -448.4339904785156, "loss": 0.0073, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -26.294750213623047, "rewards/margins": 21.05908966064453, "rewards/real": -5.235660552978516, "step": 680 }, { "epoch": 1.14, "learning_rate": 2.3992673992673995e-07, "logits/generated": -1.6848552227020264, "logits/real": -2.062554121017456, "logps/generated": -727.1676025390625, "logps/real": -474.197998046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -26.072521209716797, "rewards/margins": 20.835004806518555, "rewards/real": -5.23751974105835, "step": 690 }, { "epoch": 1.15, "learning_rate": 2.3534798534798532e-07, "logits/generated": -1.83333420753479, "logits/real": -2.158902406692505, "logps/generated": -751.34423828125, "logps/real": -447.7481994628906, "loss": 0.0149, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -26.157222747802734, "rewards/margins": 21.213214874267578, "rewards/real": -4.944005966186523, "step": 700 }, { "epoch": 1.15, "eval_logits/generated": -2.014181613922119, "eval_logits/real": -2.2748398780822754, "eval_logps/generated": -716.32373046875, "eval_logps/real": -488.8265686035156, "eval_loss": 0.060765769332647324, "eval_rewards/accuracies": 0.9712963104248047, "eval_rewards/generated": -26.19886016845703, "eval_rewards/margins": 19.0504150390625, "eval_rewards/real": -7.1484456062316895, "eval_runtime": 546.2873, "eval_samples_per_second": 7.904, "eval_steps_per_second": 0.247, "step": 700 }, { "epoch": 1.17, "learning_rate": 2.3076923076923078e-07, "logits/generated": -2.009290933609009, "logits/real": -2.248413562774658, "logps/generated": -723.00830078125, "logps/real": -485.6675720214844, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/generated": -26.66790771484375, "rewards/margins": 20.04497718811035, "rewards/real": -6.622932434082031, "step": 710 }, { "epoch": 1.19, "learning_rate": 2.2619047619047619e-07, "logits/generated": -2.0311272144317627, "logits/real": -2.2884433269500732, "logps/generated": -705.41259765625, "logps/real": -439.14654541015625, "loss": 0.0105, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.88492202758789, "rewards/margins": 21.01565170288086, "rewards/real": -4.869270324707031, "step": 720 }, { "epoch": 1.19, "eval_logits/generated": -2.1674070358276367, "eval_logits/real": -2.3962087631225586, "eval_logps/generated": -656.84765625, "eval_logps/real": -462.290283203125, "eval_loss": 0.04788418486714363, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/generated": -20.25126075744629, "eval_rewards/margins": 15.756444931030273, "eval_rewards/real": -4.494814872741699, "eval_runtime": 549.4973, "eval_samples_per_second": 7.858, "eval_steps_per_second": 0.246, "step": 725 }, { "epoch": 1.2, "learning_rate": 2.216117216117216e-07, "logits/generated": -2.1520206928253174, "logits/real": -2.384286403656006, "logps/generated": -693.1768188476562, "logps/real": -427.69384765625, "loss": 0.0129, "rewards/accuracies": 0.981249988079071, "rewards/generated": -21.8217830657959, "rewards/margins": 18.449357986450195, "rewards/real": -3.3724265098571777, "step": 730 }, { "epoch": 1.22, "learning_rate": 2.1703296703296702e-07, "logits/generated": -2.164816379547119, "logits/real": -2.4095542430877686, "logps/generated": -731.2252807617188, "logps/real": -431.8036193847656, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/generated": -22.88985252380371, "rewards/margins": 19.35944938659668, "rewards/real": -3.5304055213928223, "step": 740 }, { "epoch": 1.23, "learning_rate": 2.1245421245421245e-07, "logits/generated": -2.3042044639587402, "logits/real": -2.4798831939697266, "logps/generated": -710.3934326171875, "logps/real": -447.3128967285156, "loss": 0.032, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.082233428955078, "rewards/margins": 17.938434600830078, "rewards/real": -4.143797874450684, "step": 750 }, { "epoch": 1.23, "eval_logits/generated": -2.2425506114959717, "eval_logits/real": -2.4413764476776123, "eval_logps/generated": -667.56494140625, "eval_logps/real": -468.2917175292969, "eval_loss": 0.05120517686009407, "eval_rewards/accuracies": 0.9685184955596924, "eval_rewards/generated": -21.322982788085938, "eval_rewards/margins": 16.22801971435547, "eval_rewards/real": -5.094962120056152, "eval_runtime": 548.9256, "eval_samples_per_second": 7.866, "eval_steps_per_second": 0.246, "step": 750 }, { "epoch": 1.25, "learning_rate": 2.0787545787545788e-07, "logits/generated": -2.3006973266601562, "logits/real": -2.4579455852508545, "logps/generated": -678.5315551757812, "logps/real": -444.89593505859375, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.485198974609375, "rewards/margins": 17.16245460510254, "rewards/real": -3.322744846343994, "step": 760 }, { "epoch": 1.27, "learning_rate": 2.0329670329670329e-07, "logits/generated": -2.276136875152588, "logits/real": -2.4627890586853027, "logps/generated": -626.4572143554688, "logps/real": -460.61090087890625, "loss": 0.0042, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -18.86962890625, "rewards/margins": 15.991884231567383, "rewards/real": -2.8777451515197754, "step": 770 }, { "epoch": 1.28, "eval_logits/generated": -2.215578317642212, "eval_logits/real": -2.4378671646118164, "eval_logps/generated": -646.9547729492188, "eval_logps/real": -457.63812255859375, "eval_loss": 0.04618338495492935, "eval_rewards/accuracies": 0.970370352268219, "eval_rewards/generated": -19.261966705322266, "eval_rewards/margins": 15.232365608215332, "eval_rewards/real": -4.029602527618408, "eval_runtime": 545.4173, "eval_samples_per_second": 7.917, "eval_steps_per_second": 0.248, "step": 775 }, { "epoch": 1.28, "learning_rate": 1.987179487179487e-07, "logits/generated": -2.1666736602783203, "logits/real": -2.430644989013672, "logps/generated": -617.3143310546875, "logps/real": -455.05450439453125, "loss": 0.0181, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -19.36526870727539, "rewards/margins": 16.152751922607422, "rewards/real": -3.2125182151794434, "step": 780 }, { "epoch": 1.3, "learning_rate": 1.9413919413919415e-07, "logits/generated": -2.1149790287017822, "logits/real": -2.3885388374328613, "logps/generated": -665.8757934570312, "logps/real": -415.7660217285156, "loss": 0.0105, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -21.40970230102539, "rewards/margins": 19.079601287841797, "rewards/real": -2.3301024436950684, "step": 790 }, { "epoch": 1.32, "learning_rate": 1.8956043956043955e-07, "logits/generated": -2.172045946121216, "logits/real": -2.39658784866333, "logps/generated": -646.7432861328125, "logps/real": -423.2108459472656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -20.290569305419922, "rewards/margins": 17.622339248657227, "rewards/real": -2.6682305335998535, "step": 800 }, { "epoch": 1.32, "eval_logits/generated": -2.132990598678589, "eval_logits/real": -2.384284019470215, "eval_logps/generated": -652.7453002929688, "eval_logps/real": -457.6903381347656, "eval_loss": 0.04753004014492035, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -19.841014862060547, "eval_rewards/margins": 15.806192398071289, "eval_rewards/real": -4.034823894500732, "eval_runtime": 542.7194, "eval_samples_per_second": 7.956, "eval_steps_per_second": 0.249, "step": 800 }, { "epoch": 1.33, "learning_rate": 1.8498168498168498e-07, "logits/generated": -2.2161784172058105, "logits/real": -2.3784358501434326, "logps/generated": -677.4197998046875, "logps/real": -415.10162353515625, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.94455909729004, "rewards/margins": 18.23735809326172, "rewards/real": -2.707200527191162, "step": 810 }, { "epoch": 1.35, "learning_rate": 1.8040293040293039e-07, "logits/generated": -2.162409782409668, "logits/real": -2.4266014099121094, "logps/generated": -634.3106689453125, "logps/real": -426.293212890625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -19.84092140197754, "rewards/margins": 17.243751525878906, "rewards/real": -2.5971689224243164, "step": 820 }, { "epoch": 1.36, "eval_logits/generated": -2.1121842861175537, "eval_logits/real": -2.371814012527466, "eval_logps/generated": -662.919189453125, "eval_logps/real": -462.037841796875, "eval_loss": 0.042767442762851715, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/generated": -20.858409881591797, "eval_rewards/margins": 16.388830184936523, "eval_rewards/real": -4.469577789306641, "eval_runtime": 539.1341, "eval_samples_per_second": 8.009, "eval_steps_per_second": 0.25, "step": 825 }, { "epoch": 1.37, "learning_rate": 1.7582417582417584e-07, "logits/generated": -2.063286304473877, "logits/real": -2.3742778301239014, "logps/generated": -688.5067138671875, "logps/real": -435.5904846191406, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.731130599975586, "rewards/margins": 19.344463348388672, "rewards/real": -3.3866665363311768, "step": 830 }, { "epoch": 1.38, "learning_rate": 1.7124542124542125e-07, "logits/generated": -1.8474409580230713, "logits/real": -2.223342180252075, "logps/generated": -727.342041015625, "logps/real": -428.4285583496094, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -27.19036293029785, "rewards/margins": 22.461837768554688, "rewards/real": -4.728522300720215, "step": 840 }, { "epoch": 1.4, "learning_rate": 1.6666666666666665e-07, "logits/generated": -1.7657016515731812, "logits/real": -2.246410369873047, "logps/generated": -764.67236328125, "logps/real": -466.6670837402344, "loss": 0.004, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -28.994009017944336, "rewards/margins": 23.707733154296875, "rewards/real": -5.286276817321777, "step": 850 }, { "epoch": 1.4, "eval_logits/generated": -1.7239781618118286, "eval_logits/real": -2.1708648204803467, "eval_logps/generated": -710.6077880859375, "eval_logps/real": -480.1641845703125, "eval_loss": 0.04679808393120766, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -25.62726593017578, "eval_rewards/margins": 19.34505844116211, "eval_rewards/real": -6.282209396362305, "eval_runtime": 539.6647, "eval_samples_per_second": 8.001, "eval_steps_per_second": 0.25, "step": 850 }, { "epoch": 1.42, "learning_rate": 1.6208791208791208e-07, "logits/generated": -1.5984615087509155, "logits/real": -2.1419482231140137, "logps/generated": -743.21337890625, "logps/real": -483.67608642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -27.930688858032227, "rewards/margins": 22.560997009277344, "rewards/real": -5.369691371917725, "step": 860 }, { "epoch": 1.43, "learning_rate": 1.5750915750915748e-07, "logits/generated": -1.7401374578475952, "logits/real": -2.2165982723236084, "logps/generated": -744.0372314453125, "logps/real": -467.1297912597656, "loss": 0.0222, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.236160278320312, "rewards/margins": 20.77927017211914, "rewards/real": -5.456892013549805, "step": 870 }, { "epoch": 1.44, "eval_logits/generated": -1.6543631553649902, "eval_logits/real": -2.1242499351501465, "eval_logps/generated": -685.1132202148438, "eval_logps/real": -477.74078369140625, "eval_loss": 0.05835163965821266, "eval_rewards/accuracies": 0.9759259223937988, "eval_rewards/generated": -23.077802658081055, "eval_rewards/margins": 17.037935256958008, "eval_rewards/real": -6.039866924285889, "eval_runtime": 542.9056, "eval_samples_per_second": 7.954, "eval_steps_per_second": 0.249, "step": 875 }, { "epoch": 1.45, "learning_rate": 1.5293040293040294e-07, "logits/generated": -1.6089227199554443, "logits/real": -2.1636829376220703, "logps/generated": -724.6553344726562, "logps/real": -462.1668395996094, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/generated": -25.3486385345459, "rewards/margins": 20.25618553161621, "rewards/real": -5.0924506187438965, "step": 880 }, { "epoch": 1.47, "learning_rate": 1.4835164835164835e-07, "logits/generated": -1.762058973312378, "logits/real": -2.180694818496704, "logps/generated": -734.1724853515625, "logps/real": -474.927978515625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/generated": -24.476972579956055, "rewards/margins": 20.52760124206543, "rewards/real": -3.949371814727783, "step": 890 }, { "epoch": 1.48, "learning_rate": 1.4377289377289375e-07, "logits/generated": -1.8223994970321655, "logits/real": -2.230008602142334, "logps/generated": -674.3191528320312, "logps/real": -437.16448974609375, "loss": 0.0063, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -22.358070373535156, "rewards/margins": 19.133329391479492, "rewards/real": -3.2247397899627686, "step": 900 }, { "epoch": 1.48, "eval_logits/generated": -1.7696380615234375, "eval_logits/real": -2.202569007873535, "eval_logps/generated": -652.3550415039062, "eval_logps/real": -456.0634765625, "eval_loss": 0.04898802191019058, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/generated": -19.801984786987305, "eval_rewards/margins": 15.92984676361084, "eval_rewards/real": -3.8721377849578857, "eval_runtime": 541.3411, "eval_samples_per_second": 7.976, "eval_steps_per_second": 0.249, "step": 900 }, { "epoch": 1.5, "learning_rate": 1.3919413919413918e-07, "logits/generated": -1.7800897359848022, "logits/real": -2.215033531188965, "logps/generated": -680.4356689453125, "logps/real": -436.82257080078125, "loss": 0.0095, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.27292251586914, "rewards/margins": 17.82526969909668, "rewards/real": -3.4476523399353027, "step": 910 }, { "epoch": 1.51, "learning_rate": 1.346153846153846e-07, "logits/generated": -1.6774091720581055, "logits/real": -2.172245740890503, "logps/generated": -713.2435302734375, "logps/real": -457.10125732421875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -23.64259147644043, "rewards/margins": 19.829133987426758, "rewards/real": -3.813458204269409, "step": 920 }, { "epoch": 1.52, "eval_logits/generated": -1.6461421251296997, "eval_logits/real": -2.1238648891448975, "eval_logps/generated": -691.8392333984375, "eval_logps/real": -470.1639404296875, "eval_loss": 0.04775296524167061, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -23.750408172607422, "eval_rewards/margins": 18.46822738647461, "eval_rewards/real": -5.282179355621338, "eval_runtime": 545.1425, "eval_samples_per_second": 7.921, "eval_steps_per_second": 0.248, "step": 925 }, { "epoch": 1.53, "learning_rate": 1.3003663003663004e-07, "logits/generated": -1.691014051437378, "logits/real": -2.090836763381958, "logps/generated": -717.5584716796875, "logps/real": -426.21075439453125, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.57354736328125, "rewards/margins": 20.423908233642578, "rewards/real": -4.1496381759643555, "step": 930 }, { "epoch": 1.55, "learning_rate": 1.2545787545787545e-07, "logits/generated": -1.7866837978363037, "logits/real": -2.1096789836883545, "logps/generated": -695.7778930664062, "logps/real": -430.5741271972656, "loss": 0.0098, "rewards/accuracies": 0.981249988079071, "rewards/generated": -23.648204803466797, "rewards/margins": 19.338354110717773, "rewards/real": -4.30985164642334, "step": 940 }, { "epoch": 1.56, "learning_rate": 1.2087912087912088e-07, "logits/generated": -1.6880855560302734, "logits/real": -2.137526512145996, "logps/generated": -677.9566040039062, "logps/real": -424.09771728515625, "loss": 0.0169, "rewards/accuracies": 0.981249988079071, "rewards/generated": -23.116125106811523, "rewards/margins": 19.75508689880371, "rewards/real": -3.3610382080078125, "step": 950 }, { "epoch": 1.56, "eval_logits/generated": -1.6890491247177124, "eval_logits/real": -2.1447362899780273, "eval_logps/generated": -683.7665405273438, "eval_logps/real": -466.7169189453125, "eval_loss": 0.045488789677619934, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -22.943147659301758, "eval_rewards/margins": 18.005666732788086, "eval_rewards/real": -4.937481880187988, "eval_runtime": 541.292, "eval_samples_per_second": 7.977, "eval_steps_per_second": 0.249, "step": 950 }, { "epoch": 1.58, "learning_rate": 1.163003663003663e-07, "logits/generated": -1.7114064693450928, "logits/real": -2.16520094871521, "logps/generated": -719.1756591796875, "logps/real": -456.89697265625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -25.02173614501953, "rewards/margins": 20.79627227783203, "rewards/real": -4.225462913513184, "step": 960 }, { "epoch": 1.6, "learning_rate": 1.1172161172161172e-07, "logits/generated": -1.6347287893295288, "logits/real": -2.1204426288604736, "logps/generated": -683.4697875976562, "logps/real": -454.54461669921875, "loss": 0.0063, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.318696975708008, "rewards/margins": 19.61166763305664, "rewards/real": -4.707026958465576, "step": 970 }, { "epoch": 1.6, "eval_logits/generated": -1.5889698266983032, "eval_logits/real": -2.077850818634033, "eval_logps/generated": -704.8994140625, "eval_logps/real": -477.1242370605469, "eval_loss": 0.04485413804650307, "eval_rewards/accuracies": 0.9740740656852722, "eval_rewards/generated": -25.056419372558594, "eval_rewards/margins": 19.078208923339844, "eval_rewards/real": -5.978213787078857, "eval_runtime": 545.2881, "eval_samples_per_second": 7.919, "eval_steps_per_second": 0.248, "step": 975 }, { "epoch": 1.61, "learning_rate": 1.0714285714285713e-07, "logits/generated": -1.6600666046142578, "logits/real": -2.114758014678955, "logps/generated": -709.62890625, "logps/real": -427.54022216796875, "loss": 0.0122, "rewards/accuracies": 0.981249988079071, "rewards/generated": -24.828792572021484, "rewards/margins": 20.330127716064453, "rewards/real": -4.498665809631348, "step": 980 }, { "epoch": 1.63, "learning_rate": 1.0256410256410256e-07, "logits/generated": -1.5314631462097168, "logits/real": -2.0638275146484375, "logps/generated": -710.69482421875, "logps/real": -447.7142028808594, "loss": 0.0176, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.278757095336914, "rewards/margins": 20.97730255126953, "rewards/real": -4.301451683044434, "step": 990 }, { "epoch": 1.65, "learning_rate": 9.798534798534798e-08, "logits/generated": -1.659684181213379, "logits/real": -2.0716404914855957, "logps/generated": -747.033203125, "logps/real": -457.4671936035156, "loss": 0.0144, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.58378028869629, "rewards/margins": 22.331199645996094, "rewards/real": -4.2525811195373535, "step": 1000 }, { "epoch": 1.65, "eval_logits/generated": -1.626175880432129, "eval_logits/real": -2.085879325866699, "eval_logps/generated": -683.6390991210938, "eval_logps/real": -469.9638671875, "eval_loss": 0.04278276115655899, "eval_rewards/accuracies": 0.9731481671333313, "eval_rewards/generated": -22.93039894104004, "eval_rewards/margins": 17.668224334716797, "eval_rewards/real": -5.262173175811768, "eval_runtime": 544.8826, "eval_samples_per_second": 7.925, "eval_steps_per_second": 0.248, "step": 1000 }, { "epoch": 1.66, "learning_rate": 9.340659340659341e-08, "logits/generated": -1.5913441181182861, "logits/real": -2.1010706424713135, "logps/generated": -683.9640502929688, "logps/real": -461.71038818359375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -23.161916732788086, "rewards/margins": 19.142833709716797, "rewards/real": -4.0190839767456055, "step": 1010 }, { "epoch": 1.68, "learning_rate": 8.882783882783882e-08, "logits/generated": -1.6814014911651611, "logits/real": -2.1148276329040527, "logps/generated": -701.2926025390625, "logps/real": -449.57421875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -24.634958267211914, "rewards/margins": 20.201641082763672, "rewards/real": -4.433315753936768, "step": 1020 }, { "epoch": 1.69, "eval_logits/generated": -1.607029914855957, "eval_logits/real": -2.093400478363037, "eval_logps/generated": -695.1799926757812, "eval_logps/real": -472.4885559082031, "eval_loss": 0.04106166213750839, "eval_rewards/accuracies": 0.9759259223937988, "eval_rewards/generated": -24.0844783782959, "eval_rewards/margins": 18.56983757019043, "eval_rewards/real": -5.514641284942627, "eval_runtime": 541.5376, "eval_samples_per_second": 7.974, "eval_steps_per_second": 0.249, "step": 1025 }, { "epoch": 1.7, "learning_rate": 8.424908424908425e-08, "logits/generated": -1.6199986934661865, "logits/real": -2.0736172199249268, "logps/generated": -728.6807861328125, "logps/real": -431.51824951171875, "loss": 0.0066, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.239320755004883, "rewards/margins": 21.229106903076172, "rewards/real": -4.0102128982543945, "step": 1030 }, { "epoch": 1.71, "learning_rate": 7.967032967032966e-08, "logits/generated": -1.6336625814437866, "logits/real": -2.1006054878234863, "logps/generated": -734.5277709960938, "logps/real": -478.76446533203125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -26.6556339263916, "rewards/margins": 21.211820602416992, "rewards/real": -5.443814277648926, "step": 1040 }, { "epoch": 1.73, "learning_rate": 7.509157509157509e-08, "logits/generated": -1.649921178817749, "logits/real": -2.090855598449707, "logps/generated": -750.3670043945312, "logps/real": -452.450927734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -26.29367446899414, "rewards/margins": 22.180587768554688, "rewards/real": -4.113083362579346, "step": 1050 }, { "epoch": 1.73, "eval_logits/generated": -1.6779299974441528, "eval_logits/real": -2.1277213096618652, "eval_logps/generated": -691.9456787109375, "eval_logps/real": -471.51629638671875, "eval_loss": 0.04077158868312836, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -23.761049270629883, "eval_rewards/margins": 18.3436336517334, "eval_rewards/real": -5.417417049407959, "eval_runtime": 539.3269, "eval_samples_per_second": 8.006, "eval_steps_per_second": 0.25, "step": 1050 }, { "epoch": 1.74, "learning_rate": 7.051282051282051e-08, "logits/generated": -1.722020149230957, "logits/real": -2.128837823867798, "logps/generated": -728.57861328125, "logps/real": -456.64312744140625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -25.133262634277344, "rewards/margins": 21.077022552490234, "rewards/real": -4.0562424659729, "step": 1060 }, { "epoch": 1.76, "learning_rate": 6.593406593406594e-08, "logits/generated": -1.723589539527893, "logits/real": -2.129660129547119, "logps/generated": -737.1346435546875, "logps/real": -446.6085510253906, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -26.5841064453125, "rewards/margins": 21.89805030822754, "rewards/real": -4.686056137084961, "step": 1070 }, { "epoch": 1.77, "eval_logits/generated": -1.704836130142212, "eval_logits/real": -2.1411664485931396, "eval_logps/generated": -699.8467407226562, "eval_logps/real": -474.1795654296875, "eval_loss": 0.04111822694540024, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -24.55116081237793, "eval_rewards/margins": 18.867414474487305, "eval_rewards/real": -5.683747291564941, "eval_runtime": 540.5677, "eval_samples_per_second": 7.988, "eval_steps_per_second": 0.25, "step": 1075 }, { "epoch": 1.78, "learning_rate": 6.135531135531135e-08, "logits/generated": -1.6880667209625244, "logits/real": -2.135887861251831, "logps/generated": -704.2137451171875, "logps/real": -462.7354431152344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -25.7542724609375, "rewards/margins": 20.865833282470703, "rewards/real": -4.88844108581543, "step": 1080 }, { "epoch": 1.79, "learning_rate": 5.677655677655677e-08, "logits/generated": -1.630059003829956, "logits/real": -2.092768430709839, "logps/generated": -720.8707275390625, "logps/real": -457.188720703125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -26.587078094482422, "rewards/margins": 21.475454330444336, "rewards/real": -5.1116228103637695, "step": 1090 }, { "epoch": 1.81, "learning_rate": 5.2197802197802196e-08, "logits/generated": -1.6180572509765625, "logits/real": -2.073620080947876, "logps/generated": -702.1611328125, "logps/real": -436.5894470214844, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.184375762939453, "rewards/margins": 21.657176971435547, "rewards/real": -4.52719783782959, "step": 1100 }, { "epoch": 1.81, "eval_logits/generated": -1.6256543397903442, "eval_logits/real": -2.091676950454712, "eval_logps/generated": -707.8128662109375, "eval_logps/real": -476.0543212890625, "eval_loss": 0.04039894789457321, "eval_rewards/accuracies": 0.9759259223937988, "eval_rewards/generated": -25.347776412963867, "eval_rewards/margins": 19.476551055908203, "eval_rewards/real": -5.87122106552124, "eval_runtime": 541.5935, "eval_samples_per_second": 7.973, "eval_steps_per_second": 0.249, "step": 1100 }, { "epoch": 1.83, "learning_rate": 4.7619047619047613e-08, "logits/generated": -1.6621770858764648, "logits/real": -2.0922763347625732, "logps/generated": -746.7325439453125, "logps/real": -442.34857177734375, "loss": 0.0109, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -28.543167114257812, "rewards/margins": 23.869972229003906, "rewards/real": -4.6731953620910645, "step": 1110 }, { "epoch": 1.84, "learning_rate": 4.304029304029304e-08, "logits/generated": -1.6281875371932983, "logits/real": -2.095040798187256, "logps/generated": -751.2948608398438, "logps/real": -464.0716857910156, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/generated": -25.601943969726562, "rewards/margins": 21.863361358642578, "rewards/real": -3.738584041595459, "step": 1120 }, { "epoch": 1.85, "eval_logits/generated": -1.6509149074554443, "eval_logits/real": -2.1029398441314697, "eval_logps/generated": -686.7852783203125, "eval_logps/real": -468.099853515625, "eval_loss": 0.038483668118715286, "eval_rewards/accuracies": 0.9740740656852722, "eval_rewards/generated": -23.245014190673828, "eval_rewards/margins": 18.169240951538086, "eval_rewards/real": -5.075774192810059, "eval_runtime": 542.4541, "eval_samples_per_second": 7.96, "eval_steps_per_second": 0.249, "step": 1125 }, { "epoch": 1.86, "learning_rate": 3.846153846153846e-08, "logits/generated": -1.7731685638427734, "logits/real": -2.1262717247009277, "logps/generated": -718.3228149414062, "logps/real": -450.9098205566406, "loss": 0.018, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.984769821166992, "rewards/margins": 19.602436065673828, "rewards/real": -4.382335662841797, "step": 1130 }, { "epoch": 1.88, "learning_rate": 3.388278388278388e-08, "logits/generated": -1.7456114292144775, "logits/real": -2.1026391983032227, "logps/generated": -726.411376953125, "logps/real": -401.18524169921875, "loss": 0.0146, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.529359817504883, "rewards/margins": 21.635169982910156, "rewards/real": -3.8941879272460938, "step": 1140 }, { "epoch": 1.89, "learning_rate": 2.9304029304029303e-08, "logits/generated": -1.6194369792938232, "logits/real": -2.1418557167053223, "logps/generated": -741.9666748046875, "logps/real": -464.30682373046875, "loss": 0.0038, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -27.52840232849121, "rewards/margins": 22.9106388092041, "rewards/real": -4.617762565612793, "step": 1150 }, { "epoch": 1.89, "eval_logits/generated": -1.6736239194869995, "eval_logits/real": -2.1248714923858643, "eval_logps/generated": -689.571533203125, "eval_logps/real": -469.41937255859375, "eval_loss": 0.03756081312894821, "eval_rewards/accuracies": 0.9759259223937988, "eval_rewards/generated": -23.52364158630371, "eval_rewards/margins": 18.315916061401367, "eval_rewards/real": -5.20772647857666, "eval_runtime": 546.839, "eval_samples_per_second": 7.896, "eval_steps_per_second": 0.247, "step": 1150 }, { "epoch": 1.91, "learning_rate": 2.4725274725274723e-08, "logits/generated": -1.757741928100586, "logits/real": -2.1521947383880615, "logps/generated": -727.6856689453125, "logps/real": -433.1048889160156, "loss": 0.0049, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.25569725036621, "rewards/margins": 21.249250411987305, "rewards/real": -4.0064473152160645, "step": 1160 }, { "epoch": 1.93, "learning_rate": 2.0146520146520147e-08, "logits/generated": -1.6687755584716797, "logits/real": -2.1527109146118164, "logps/generated": -697.9734497070312, "logps/real": -447.568359375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -24.455238342285156, "rewards/margins": 20.623992919921875, "rewards/real": -3.831244945526123, "step": 1170 }, { "epoch": 1.93, "eval_logits/generated": -1.6968693733215332, "eval_logits/real": -2.138258218765259, "eval_logps/generated": -687.8193359375, "eval_logps/real": -468.58880615234375, "eval_loss": 0.03790770843625069, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/generated": -23.348421096801758, "eval_rewards/margins": 18.223752975463867, "eval_rewards/real": -5.124669075012207, "eval_runtime": 546.4967, "eval_samples_per_second": 7.901, "eval_steps_per_second": 0.247, "step": 1175 }, { "epoch": 1.94, "learning_rate": 1.5567765567765568e-08, "logits/generated": -1.6982448101043701, "logits/real": -2.162806987762451, "logps/generated": -685.9969482421875, "logps/real": -421.25787353515625, "loss": 0.0054, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -25.75900650024414, "rewards/margins": 22.124160766601562, "rewards/real": -3.63484525680542, "step": 1180 }, { "epoch": 1.96, "learning_rate": 1.098901098901099e-08, "logits/generated": -1.7311254739761353, "logits/real": -2.1759705543518066, "logps/generated": -742.8027954101562, "logps/real": -477.0604553222656, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.570262908935547, "rewards/margins": 21.3287410736084, "rewards/real": -4.2415266036987305, "step": 1190 }, { "epoch": 1.98, "learning_rate": 6.41025641025641e-09, "logits/generated": -1.7832437753677368, "logits/real": -2.145749568939209, "logps/generated": -705.14697265625, "logps/real": -451.7295837402344, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -24.158727645874023, "rewards/margins": 20.19052505493164, "rewards/real": -3.968203067779541, "step": 1200 }, { "epoch": 1.98, "eval_logits/generated": -1.6814604997634888, "eval_logits/real": -2.1279709339141846, "eval_logps/generated": -690.4514770507812, "eval_logps/real": -469.20892333984375, "eval_loss": 0.03795896843075752, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/generated": -23.611637115478516, "eval_rewards/margins": 18.424955368041992, "eval_rewards/real": -5.186681747436523, "eval_runtime": 540.8892, "eval_samples_per_second": 7.983, "eval_steps_per_second": 0.25, "step": 1200 }, { "epoch": 1.99, "learning_rate": 1.8315018315018314e-09, "logits/generated": -1.7156912088394165, "logits/real": -2.155310869216919, "logps/generated": -734.7672119140625, "logps/real": -468.51580810546875, "loss": 0.0047, "rewards/accuracies": 0.9937499761581421, "rewards/generated": -27.58634376525879, "rewards/margins": 23.325031280517578, "rewards/real": -4.261313438415527, "step": 1210 }, { "epoch": 2.0, "step": 1214, "total_flos": 0.0, "train_loss": 0.05120065249445122, "train_runtime": 45973.3661, "train_samples_per_second": 1.69, "train_steps_per_second": 0.026 } ], "logging_steps": 10, "max_steps": 1214, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }