{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9973890339425587, "eval_steps": 100.0, "global_step": 191, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.0979137420654297, "logits/oppo_generated": -3.0123190879821777, "logits/oppo_real": -3.0979137420654297, "logits/real": -3.0123190879821777, "logps/generated": -90.71572875976562, "logps/oppo_gen": -90.71572875976562, "logps/oppo_real": -483.66973876953125, "logps/real": -483.66973876953125, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.83146333694458, "logits/oppo_generated": -2.7920122146606445, "logits/oppo_real": -2.83146333694458, "logits/real": -2.7920122146606445, "logps/generated": -62.34805679321289, "logps/oppo_gen": -62.34805679321289, "logps/oppo_real": -294.31817626953125, "logps/real": -294.31817626953125, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 2 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.919513702392578, "logits/oppo_generated": -2.9197988510131836, "logits/oppo_real": -2.919513702392578, "logits/real": -2.9197988510131836, "logps/generated": -106.68229675292969, "logps/oppo_gen": -106.68229675292969, "logps/oppo_real": -366.2132873535156, "logps/real": -366.2132873535156, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 3 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.021902561187744, "logits/oppo_generated": -2.7693958282470703, "logits/oppo_real": -3.021902561187744, "logits/real": -2.7693958282470703, "logps/generated": -71.52165222167969, "logps/oppo_gen": -71.52165222167969, "logps/oppo_real": -280.28497314453125, "logps/real": -280.28497314453125, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 4 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.7911243438720703, "logits/oppo_generated": -2.798036575317383, "logits/oppo_real": -2.7911243438720703, "logits/real": -2.798036575317383, "logps/generated": -43.92365646362305, "logps/oppo_gen": -43.92365646362305, "logps/oppo_real": -143.5323944091797, "logps/real": -143.5323944091797, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 5 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.104882001876831, "logits/oppo_generated": -2.8853113651275635, "logits/oppo_real": -3.104882001876831, "logits/real": -2.8853113651275635, "logps/generated": -85.01286315917969, "logps/oppo_gen": -85.01286315917969, "logps/oppo_real": -311.1739196777344, "logps/real": -311.1739196777344, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 6 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.0711278915405273, "logits/oppo_generated": -3.0225138664245605, "logits/oppo_real": -3.0711278915405273, "logits/real": -3.0225138664245605, "logps/generated": -104.07185363769531, "logps/oppo_gen": -104.07185363769531, "logps/oppo_real": -375.48779296875, "logps/real": -375.48779296875, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 7 }, { "epoch": 0.04, "grad_norm": 135.4093691943988, "learning_rate": 1.6666666666666667e-08, "logits/generated": -2.9351305961608887, "logits/oppo_generated": -2.9404444694519043, "logits/oppo_real": -2.9351305961608887, "logits/real": -2.9404444694519043, "logps/generated": -94.80602264404297, "logps/oppo_gen": -94.80602264404297, "logps/oppo_real": -294.525146484375, "logps/real": -294.525146484375, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 8 }, { "epoch": 0.05, "grad_norm": 142.18188175713527, "learning_rate": 3.3333333333333334e-08, "logits/generated": -3.0981688499450684, "logits/oppo_generated": -2.8864831924438477, "logits/oppo_real": -3.0981688499450684, "logits/real": -2.8864831924438477, "logps/generated": -65.3975830078125, "logps/oppo_gen": -65.3975830078125, "logps/oppo_real": -312.339111328125, "logps/real": -312.339111328125, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 9 }, { "epoch": 0.05, "grad_norm": 139.81928207734796, "learning_rate": 5e-08, "logits/generated": -2.960458278656006, "logits/oppo_generated": -2.756359577178955, "logits/oppo_real": -2.960458278656006, "logits/real": -2.756359577178955, "logps/generated": -76.06861877441406, "logps/oppo_gen": -76.06861877441406, "logps/oppo_real": -265.8212890625, "logps/real": -265.8212890625, "loss": 5.3891, "loss/gen": 7.389056205749512, "loss/real": -2.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 10 }, { "epoch": 0.06, "grad_norm": 146.15293253130434, "learning_rate": 6.666666666666667e-08, "logits/generated": -2.5165090560913086, "logits/oppo_generated": -2.6119813919067383, "logits/oppo_real": -2.516786575317383, "logits/real": -2.611802101135254, "logps/generated": -118.57888793945312, "logps/oppo_gen": -118.53258514404297, "logps/oppo_real": -290.76776123046875, "logps/real": -290.7937927246094, "loss": 5.3858, "loss/gen": 7.385635852813721, "loss/real": -1.9997397661209106, "rewards/accuracies": 0.75, "rewards/generated": -0.04630112648010254, "rewards/margins": 0.020273208618164062, "rewards/real": -0.026027917861938477, "step": 11 }, { "epoch": 0.06, "grad_norm": 135.79117124283155, "learning_rate": 8.333333333333333e-08, "logits/generated": -3.1297383308410645, "logits/oppo_generated": -3.0934252738952637, "logits/oppo_real": -3.130321502685547, "logits/real": -3.0924453735351562, "logps/generated": -96.59043884277344, "logps/oppo_gen": -96.39014434814453, "logps/oppo_real": -432.87994384765625, "logps/real": -432.9808349609375, "loss": 5.3776, "loss/gen": 7.374272346496582, "loss/real": -1.9989911317825317, "rewards/accuracies": 0.75, "rewards/generated": -0.20029473304748535, "rewards/margins": 0.09940791130065918, "rewards/real": -0.10088682174682617, "step": 12 }, { "epoch": 0.07, "grad_norm": 138.10590224307938, "learning_rate": 1e-07, "logits/generated": -2.457440137863159, "logits/oppo_generated": -2.3860814571380615, "logits/oppo_real": -2.459930419921875, "logits/real": -2.383344888687134, "logps/generated": -67.26084899902344, "logps/oppo_gen": -66.88719940185547, "logps/oppo_real": -307.066650390625, "logps/real": -307.28009033203125, "loss": 5.3544, "loss/gen": 7.361501216888428, "loss/real": -1.9978655576705933, "rewards/accuracies": 0.875, "rewards/generated": -0.37365150451660156, "rewards/margins": 0.16021156311035156, "rewards/real": -0.21343994140625, "step": 13 }, { "epoch": 0.07, "grad_norm": 134.5610848160604, "learning_rate": 1.1666666666666667e-07, "logits/generated": -2.897860050201416, "logits/oppo_generated": -2.7236456871032715, "logits/oppo_real": -2.906421661376953, "logits/real": -2.7175283432006836, "logps/generated": -55.62891387939453, "logps/oppo_gen": -54.473785400390625, "logps/oppo_real": -214.07330322265625, "logps/real": -214.57391357421875, "loss": 5.3058, "loss/gen": 7.3042426109313965, "loss/real": -1.9949939250946045, "rewards/accuracies": 0.875, "rewards/generated": -1.155130386352539, "rewards/margins": 0.6545138359069824, "rewards/real": -0.5006165504455566, "step": 14 }, { "epoch": 0.08, "grad_norm": 143.7638580276698, "learning_rate": 1.3333333333333334e-07, "logits/generated": -2.9454569816589355, "logits/oppo_generated": -2.880186080932617, "logits/oppo_real": -2.954317092895508, "logits/real": -2.869965076446533, "logps/generated": -69.53794860839844, "logps/oppo_gen": -67.36585998535156, "logps/oppo_real": -272.5278015136719, "logps/real": -273.41131591796875, "loss": 5.2534, "loss/gen": 7.230417251586914, "loss/real": -1.9911651611328125, "rewards/accuracies": 1.0, "rewards/generated": -2.172096014022827, "rewards/margins": 1.2886085510253906, "rewards/real": -0.8834874629974365, "step": 15 }, { "epoch": 0.08, "grad_norm": 136.58602486162337, "learning_rate": 1.5e-07, "logits/generated": -2.9922404289245605, "logits/oppo_generated": -2.839021682739258, "logits/oppo_real": -3.0036399364471436, "logits/real": -2.8292860984802246, "logps/generated": -84.50007629394531, "logps/oppo_gen": -81.83857727050781, "logps/oppo_real": -431.7451171875, "logps/real": -432.54998779296875, "loss": 5.221, "loss/gen": 7.19518518447876, "loss/real": -1.9919514656066895, "rewards/accuracies": 1.0, "rewards/generated": -2.661508798599243, "rewards/margins": 1.8566477298736572, "rewards/real": -0.8048610687255859, "step": 16 }, { "epoch": 0.09, "grad_norm": 129.87295474809218, "learning_rate": 1.6666666666666665e-07, "logits/generated": -2.658919334411621, "logits/oppo_generated": -2.676953077316284, "logits/oppo_real": -2.681703805923462, "logits/real": -2.6546268463134766, "logps/generated": -79.03924560546875, "logps/oppo_gen": -73.95083618164062, "logps/oppo_real": -231.33941650390625, "logps/real": -233.62753295898438, "loss": 4.9962, "loss/gen": 7.023112773895264, "loss/real": -1.9771190881729126, "rewards/accuracies": 1.0, "rewards/generated": -5.088409423828125, "rewards/margins": 2.800309896469116, "rewards/real": -2.2880992889404297, "step": 17 }, { "epoch": 0.09, "grad_norm": 128.38777930550867, "learning_rate": 1.833333333333333e-07, "logits/generated": -3.0433835983276367, "logits/oppo_generated": -2.802915096282959, "logits/oppo_real": -3.083519458770752, "logits/real": -2.778608560562134, "logps/generated": -60.27662658691406, "logps/oppo_gen": -53.611244201660156, "logps/oppo_real": -254.54031372070312, "logps/real": -258.7552490234375, "loss": 4.9034, "loss/gen": 6.913381099700928, "loss/real": -1.957850694656372, "rewards/accuracies": 0.875, "rewards/generated": -6.66538143157959, "rewards/margins": 2.4504411220550537, "rewards/real": -4.214940547943115, "step": 18 }, { "epoch": 0.1, "grad_norm": 134.49697225913675, "learning_rate": 2e-07, "logits/generated": -3.050615072250366, "logits/oppo_generated": -2.9067916870117188, "logits/oppo_real": -3.0934958457946777, "logits/real": -2.865483283996582, "logps/generated": -97.78262329101562, "logps/oppo_gen": -87.72978210449219, "logps/oppo_real": -446.26251220703125, "logps/real": -449.0770263671875, "loss": 4.8312, "loss/gen": 6.683139324188232, "loss/real": -1.9718552827835083, "rewards/accuracies": 1.0, "rewards/generated": -10.052835464477539, "rewards/margins": 7.238361358642578, "rewards/real": -2.814474105834961, "step": 19 }, { "epoch": 0.1, "grad_norm": 132.29558967280428, "learning_rate": 2.1666666666666667e-07, "logits/generated": -2.8847241401672363, "logits/oppo_generated": -3.023085832595825, "logits/oppo_real": -2.9624710083007812, "logits/real": -2.9275739192962646, "logps/generated": -84.17431640625, "logps/oppo_gen": -66.7940902709961, "logps/oppo_real": -276.38616943359375, "logps/real": -282.4690856933594, "loss": 4.4138, "loss/gen": 6.213091850280762, "loss/real": -1.9391708374023438, "rewards/accuracies": 1.0, "rewards/generated": -17.380220413208008, "rewards/margins": 11.297311782836914, "rewards/real": -6.082908630371094, "step": 20 }, { "epoch": 0.11, "grad_norm": 122.03695169138321, "learning_rate": 2.3333333333333333e-07, "logits/generated": -2.785062313079834, "logits/oppo_generated": -2.763364315032959, "logits/oppo_real": -2.8705592155456543, "logits/real": -2.6673243045806885, "logps/generated": -97.63499450683594, "logps/oppo_gen": -77.94976043701172, "logps/oppo_real": -317.0445861816406, "logps/real": -323.0356750488281, "loss": 4.265, "loss/gen": 6.084134578704834, "loss/real": -1.9400889873504639, "rewards/accuracies": 1.0, "rewards/generated": -19.68524169921875, "rewards/margins": 13.694145202636719, "rewards/real": -5.991097450256348, "step": 21 }, { "epoch": 0.11, "grad_norm": 122.03695169138321, "learning_rate": 2.3333333333333333e-07, "logits/generated": -2.726562261581421, "logits/oppo_generated": -2.855116844177246, "logits/oppo_real": -2.865086078643799, "logits/real": -2.7427496910095215, "logps/generated": -82.3883285522461, "logps/oppo_gen": -62.901329040527344, "logps/oppo_real": -202.70956420898438, "logps/real": -216.28871154785156, "loss": 4.0626, "loss/gen": 6.088003635406494, "loss/real": -1.8642082214355469, "rewards/accuracies": 0.875, "rewards/generated": -19.48699951171875, "rewards/margins": 5.9078264236450195, "rewards/real": -13.57917308807373, "step": 22 }, { "epoch": 0.12, "grad_norm": 129.4518717878386, "learning_rate": 2.5e-07, "logits/generated": -2.3680832386016846, "logits/oppo_generated": -2.7028326988220215, "logits/oppo_real": -2.4785587787628174, "logits/real": -2.602602958679199, "logps/generated": -85.23381042480469, "logps/oppo_gen": -63.708274841308594, "logps/oppo_real": -408.9969482421875, "logps/real": -411.189697265625, "loss": 4.0392, "loss/gen": 5.969209671020508, "loss/real": -1.978072166442871, "rewards/accuracies": 1.0, "rewards/generated": -21.525535583496094, "rewards/margins": 19.332752227783203, "rewards/real": -2.192781925201416, "step": 23 }, { "epoch": 0.13, "grad_norm": 126.05579509313525, "learning_rate": 2.6666666666666667e-07, "logits/generated": -2.5577526092529297, "logits/oppo_generated": -2.848795175552368, "logits/oppo_real": -2.7064318656921387, "logits/real": -2.7243504524230957, "logps/generated": -106.70217895507812, "logps/oppo_gen": -80.21543884277344, "logps/oppo_real": -328.6651611328125, "logps/real": -338.0718078613281, "loss": 3.8879, "loss/gen": 5.678422451019287, "loss/real": -1.9059333801269531, "rewards/accuracies": 1.0, "rewards/generated": -26.486736297607422, "rewards/margins": 17.080078125, "rewards/real": -9.406658172607422, "step": 24 }, { "epoch": 0.13, "grad_norm": 126.74033031409142, "learning_rate": 2.833333333333333e-07, "logits/generated": -2.780156135559082, "logits/oppo_generated": -2.961332321166992, "logits/oppo_real": -2.937591075897217, "logits/real": -2.828620672225952, "logps/generated": -102.30010986328125, "logps/oppo_gen": -69.95628356933594, "logps/oppo_real": -426.2795104980469, "logps/real": -442.405029296875, "loss": 3.6222, "loss/gen": 5.359460830688477, "loss/real": -1.838744878768921, "rewards/accuracies": 1.0, "rewards/generated": -32.34381866455078, "rewards/margins": 16.218303680419922, "rewards/real": -16.12551498413086, "step": 25 }, { "epoch": 0.14, "grad_norm": 118.32002749429193, "learning_rate": 3e-07, "logits/generated": -2.8048362731933594, "logits/oppo_generated": -2.8632454872131348, "logits/oppo_real": -2.9817347526550293, "logits/real": -2.6736109256744385, "logps/generated": -134.10989379882812, "logps/oppo_gen": -92.99905395507812, "logps/oppo_real": -293.31121826171875, "logps/real": -314.2544250488281, "loss": 3.3844, "loss/gen": 4.9427008628845215, "loss/real": -1.7905679941177368, "rewards/accuracies": 1.0, "rewards/generated": -41.11084747314453, "rewards/margins": 20.167648315429688, "rewards/real": -20.943199157714844, "step": 26 }, { "epoch": 0.14, "grad_norm": 116.9737175007168, "learning_rate": 3.166666666666666e-07, "logits/generated": -2.8670060634613037, "logits/oppo_generated": -2.9514551162719727, "logits/oppo_real": -3.061511993408203, "logits/real": -2.7576708793640137, "logps/generated": -200.80691528320312, "logps/oppo_gen": -153.51296997070312, "logps/oppo_real": -477.2593994140625, "logps/real": -495.4168701171875, "loss": 3.1117, "loss/gen": 4.679584503173828, "loss/real": -1.818424940109253, "rewards/accuracies": 1.0, "rewards/generated": -47.29395294189453, "rewards/margins": 29.136451721191406, "rewards/real": -18.157499313354492, "step": 27 }, { "epoch": 0.15, "grad_norm": 114.94145928471589, "learning_rate": 3.333333333333333e-07, "logits/generated": -2.597921848297119, "logits/oppo_generated": -2.8073906898498535, "logits/oppo_real": -2.8212432861328125, "logits/real": -2.581136703491211, "logps/generated": -125.5290756225586, "logps/oppo_gen": -73.3681411743164, "logps/oppo_real": -366.8509826660156, "logps/real": -396.5523986816406, "loss": 2.8982, "loss/gen": 4.413166522979736, "loss/real": -1.7029860019683838, "rewards/accuracies": 1.0, "rewards/generated": -52.16094207763672, "rewards/margins": 22.459529876708984, "rewards/real": -29.701412200927734, "step": 28 }, { "epoch": 0.15, "grad_norm": 105.4175848115484, "learning_rate": 3.5e-07, "logits/generated": -2.5924081802368164, "logits/oppo_generated": -2.780890941619873, "logits/oppo_real": -2.8437681198120117, "logits/real": -2.549656629562378, "logps/generated": -120.14186096191406, "logps/oppo_gen": -64.063720703125, "logps/oppo_real": -288.26275634765625, "logps/real": -320.1248779296875, "loss": 2.627, "loss/gen": 4.243507385253906, "loss/real": -1.6813790798187256, "rewards/accuracies": 0.875, "rewards/generated": -56.078147888183594, "rewards/margins": 24.216047286987305, "rewards/real": -31.862098693847656, "step": 29 }, { "epoch": 0.16, "grad_norm": 103.82670827663998, "learning_rate": 3.666666666666666e-07, "logits/generated": -2.577294111251831, "logits/oppo_generated": -2.7607855796813965, "logits/oppo_real": -2.8285064697265625, "logits/real": -2.4904675483703613, "logps/generated": -202.02459716796875, "logps/oppo_gen": -138.3541259765625, "logps/oppo_real": -562.3087158203125, "logps/real": -597.827392578125, "loss": 2.3475, "loss/gen": 3.986393928527832, "loss/real": -1.6448135375976562, "rewards/accuracies": 0.75, "rewards/generated": -63.67047882080078, "rewards/margins": 28.15182113647461, "rewards/real": -35.51865768432617, "step": 30 }, { "epoch": 0.16, "grad_norm": 100.94574269750062, "learning_rate": 3.8333333333333335e-07, "logits/generated": -2.498058319091797, "logits/oppo_generated": -2.669119358062744, "logits/oppo_real": -2.8678927421569824, "logits/real": -2.371706008911133, "logps/generated": -145.69668579101562, "logps/oppo_gen": -70.41365051269531, "logps/oppo_real": -241.83944702148438, "logps/real": -286.0458984375, "loss": 2.0654, "loss/gen": 3.6289942264556885, "loss/real": -1.5579355955123901, "rewards/accuracies": 0.75, "rewards/generated": -75.28302001953125, "rewards/margins": 31.076583862304688, "rewards/real": -44.20643997192383, "step": 31 }, { "epoch": 0.17, "grad_norm": 98.157543369384, "learning_rate": 4e-07, "logits/generated": -2.1765198707580566, "logits/oppo_generated": -2.742915630340576, "logits/oppo_real": -2.5176854133605957, "logits/real": -2.3865435123443604, "logps/generated": -154.5072021484375, "logps/oppo_gen": -88.06977844238281, "logps/oppo_real": -250.35305786132812, "logps/real": -301.65313720703125, "loss": 1.9822, "loss/gen": 3.8242578506469727, "loss/real": -1.486999273300171, "rewards/accuracies": 0.875, "rewards/generated": -66.43741607666016, "rewards/margins": 15.137344360351562, "rewards/real": -51.300071716308594, "step": 32 }, { "epoch": 0.17, "grad_norm": 91.26540965776684, "learning_rate": 4.1666666666666667e-07, "logits/generated": -2.5197973251342773, "logits/oppo_generated": -2.7696216106414795, "logits/oppo_real": -2.8814268112182617, "logits/real": -2.416097640991211, "logps/generated": -142.49354553222656, "logps/oppo_gen": -71.01982116699219, "logps/oppo_real": -175.82728576660156, "logps/real": -231.18699645996094, "loss": 1.809, "loss/gen": 3.6547460556030273, "loss/real": -1.446402907371521, "rewards/accuracies": 0.875, "rewards/generated": -71.47373962402344, "rewards/margins": 16.114028930664062, "rewards/real": -55.359710693359375, "step": 33 }, { "epoch": 0.18, "grad_norm": 77.6623991529202, "learning_rate": 4.3333333333333335e-07, "logits/generated": -2.51528263092041, "logits/oppo_generated": -2.669196128845215, "logits/oppo_real": -3.0089945793151855, "logits/real": -2.259756565093994, "logps/generated": -147.68319702148438, "logps/oppo_gen": -57.30543518066406, "logps/oppo_real": -326.7079772949219, "logps/real": -385.50555419921875, "loss": 1.5094, "loss/gen": 3.075594902038574, "loss/real": -1.4120240211486816, "rewards/accuracies": 0.875, "rewards/generated": -90.37777709960938, "rewards/margins": 31.580162048339844, "rewards/real": -58.797607421875, "step": 34 }, { "epoch": 0.18, "grad_norm": 68.83081542698554, "learning_rate": 4.5e-07, "logits/generated": -2.14363431930542, "logits/oppo_generated": -2.504408359527588, "logits/oppo_real": -2.6590046882629395, "logits/real": -1.987848162651062, "logps/generated": -219.39852905273438, "logps/oppo_gen": -79.17024230957031, "logps/oppo_real": -203.21951293945312, "logps/real": -295.66925048828125, "loss": 1.3109, "loss/gen": 2.342175006866455, "loss/real": -1.075502634048462, "rewards/accuracies": 0.625, "rewards/generated": -140.228271484375, "rewards/margins": 47.77854919433594, "rewards/real": -92.4497299194336, "step": 35 }, { "epoch": 0.19, "grad_norm": 67.77129364404593, "learning_rate": 4.6666666666666666e-07, "logits/generated": -2.2981181144714355, "logits/oppo_generated": -2.8459527492523193, "logits/oppo_real": -2.959359645843506, "logits/real": -2.237192153930664, "logps/generated": -190.5108642578125, "logps/oppo_gen": -73.61311340332031, "logps/oppo_real": -330.1354675292969, "logps/real": -428.3790588378906, "loss": 1.2112, "loss/gen": 2.471275806427002, "loss/real": -1.017564058303833, "rewards/accuracies": 0.625, "rewards/generated": -116.89774322509766, "rewards/margins": 18.654144287109375, "rewards/real": -98.24359893798828, "step": 36 }, { "epoch": 0.19, "grad_norm": 55.4256192349542, "learning_rate": 4.833333333333333e-07, "logits/generated": -2.098021984100342, "logits/oppo_generated": -2.913597345352173, "logits/oppo_real": -2.7888224124908447, "logits/real": -2.2414162158966064, "logps/generated": -163.77145385742188, "logps/oppo_gen": -43.92766189575195, "logps/oppo_real": -201.2423858642578, "logps/real": -328.263916015625, "loss": 1.1323, "loss/gen": 2.4318315982818604, "loss/real": -0.729784369468689, "rewards/accuracies": 0.375, "rewards/generated": -119.84378814697266, "rewards/margins": -7.177766799926758, "rewards/real": -127.02156066894531, "step": 37 }, { "epoch": 0.2, "grad_norm": 43.58372700636816, "learning_rate": 5e-07, "logits/generated": -2.149717330932617, "logits/oppo_generated": -2.8873682022094727, "logits/oppo_real": -2.953073024749756, "logits/real": -2.14795184135437, "logps/generated": -204.06512451171875, "logps/oppo_gen": -63.10968017578125, "logps/oppo_real": -290.7437744140625, "logps/real": -395.0226745605469, "loss": 0.9722, "loss/gen": 1.9304319620132446, "loss/real": -0.9572109580039978, "rewards/accuracies": 0.625, "rewards/generated": -140.95542907714844, "rewards/margins": 36.676517486572266, "rewards/real": -104.2789077758789, "step": 38 }, { "epoch": 0.2, "grad_norm": 43.28064925124593, "learning_rate": 4.995519713261649e-07, "logits/generated": -2.0406126976013184, "logits/oppo_generated": -2.9155023097991943, "logits/oppo_real": -2.8444814682006836, "logits/real": -2.224266767501831, "logps/generated": -214.7823486328125, "logps/oppo_gen": -57.36619567871094, "logps/oppo_real": -319.1702880859375, "logps/real": -470.078369140625, "loss": 0.9536, "loss/gen": 1.6517560482025146, "loss/real": -0.49091899394989014, "rewards/accuracies": 0.75, "rewards/generated": -157.41615295410156, "rewards/margins": 6.508047103881836, "rewards/real": -150.90811157226562, "step": 39 }, { "epoch": 0.21, "grad_norm": 39.787796189107254, "learning_rate": 4.991039426523297e-07, "logits/generated": -2.045642614364624, "logits/oppo_generated": -2.817904233932495, "logits/oppo_real": -2.799046277999878, "logits/real": -2.124079704284668, "logps/generated": -240.5778350830078, "logps/oppo_gen": -62.37173080444336, "logps/oppo_real": -271.6532287597656, "logps/real": -416.31292724609375, "loss": 0.8435, "loss/gen": 1.3889131546020508, "loss/real": -0.5534029603004456, "rewards/accuracies": 0.875, "rewards/generated": -178.2061004638672, "rewards/margins": 33.546390533447266, "rewards/real": -144.65969848632812, "step": 40 }, { "epoch": 0.21, "grad_norm": 39.13177763030722, "learning_rate": 4.986559139784946e-07, "logits/generated": -2.117490291595459, "logits/oppo_generated": -2.833721160888672, "logits/oppo_real": -2.8886466026306152, "logits/real": -2.132415533065796, "logps/generated": -227.88870239257812, "logps/oppo_gen": -62.6229248046875, "logps/oppo_real": -254.74127197265625, "logps/real": -392.49224853515625, "loss": 0.8003, "loss/gen": 1.5646700859069824, "loss/real": -0.6224902868270874, "rewards/accuracies": 0.875, "rewards/generated": -165.26576232910156, "rewards/margins": 27.51479148864746, "rewards/real": -137.7509765625, "step": 41 }, { "epoch": 0.22, "grad_norm": 42.057857807077056, "learning_rate": 4.982078853046595e-07, "logits/generated": -2.0003559589385986, "logits/oppo_generated": -2.829643726348877, "logits/oppo_real": -2.9243969917297363, "logits/real": -2.0654373168945312, "logps/generated": -399.18463134765625, "logps/oppo_gen": -93.32018280029297, "logps/oppo_real": -296.91900634765625, "logps/real": -458.50836181640625, "loss": 0.7365, "loss/gen": 0.6350959539413452, "loss/real": -0.3841061592102051, "rewards/accuracies": 0.875, "rewards/generated": -305.86444091796875, "rewards/margins": 144.27505493164062, "rewards/real": -161.5894012451172, "step": 42 }, { "epoch": 0.22, "grad_norm": 45.58806028155152, "learning_rate": 4.977598566308244e-07, "logits/generated": -1.7739081382751465, "logits/oppo_generated": -2.5540552139282227, "logits/oppo_real": -2.583406448364258, "logits/real": -1.800355315208435, "logps/generated": -248.06814575195312, "logps/oppo_gen": -62.72918701171875, "logps/oppo_real": -156.4404296875, "logps/real": -300.55322265625, "loss": 0.6065, "loss/gen": 1.3522555828094482, "loss/real": -0.5588721036911011, "rewards/accuracies": 0.625, "rewards/generated": -185.33897399902344, "rewards/margins": 41.22618103027344, "rewards/real": -144.11279296875, "step": 43 }, { "epoch": 0.23, "grad_norm": 43.27926721541303, "learning_rate": 4.973118279569893e-07, "logits/generated": -1.790377140045166, "logits/oppo_generated": -2.83392333984375, "logits/oppo_real": -2.716136932373047, "logits/real": -1.9894132614135742, "logps/generated": -220.31033325195312, "logps/oppo_gen": -47.22636795043945, "logps/oppo_real": -154.70913696289062, "logps/real": -310.2235412597656, "loss": 0.5664, "loss/gen": 1.3508105278015137, "loss/real": -0.44485586881637573, "rewards/accuracies": 0.875, "rewards/generated": -173.08395385742188, "rewards/margins": 17.569549560546875, "rewards/real": -155.514404296875, "step": 44 }, { "epoch": 0.23, "grad_norm": 37.24892019460514, "learning_rate": 4.96863799283154e-07, "logits/generated": -2.4816246032714844, "logits/oppo_generated": -2.894857883453369, "logits/oppo_real": -3.2226767539978027, "logits/real": -2.1920199394226074, "logps/generated": -340.05987548828125, "logps/oppo_gen": -105.26202392578125, "logps/oppo_real": -493.02239990234375, "logps/real": -641.823974609375, "loss": 0.535, "loss/gen": 0.9430114030838013, "loss/real": -0.5119848847389221, "rewards/accuracies": 0.875, "rewards/generated": -234.7978515625, "rewards/margins": 85.996337890625, "rewards/real": -148.801513671875, "step": 45 }, { "epoch": 0.24, "grad_norm": 34.75340901949854, "learning_rate": 4.96415770609319e-07, "logits/generated": -1.8138926029205322, "logits/oppo_generated": -2.9227819442749023, "logits/oppo_real": -2.7699246406555176, "logits/real": -2.158921957015991, "logps/generated": -272.76275634765625, "logps/oppo_gen": -65.69087982177734, "logps/oppo_real": -185.58651733398438, "logps/real": -307.59954833984375, "loss": 0.472, "loss/gen": 1.0596184730529785, "loss/real": -0.7798694372177124, "rewards/accuracies": 1.0, "rewards/generated": -207.07186889648438, "rewards/margins": 85.05882263183594, "rewards/real": -122.01304626464844, "step": 46 }, { "epoch": 0.25, "grad_norm": 30.414269200427842, "learning_rate": 4.959677419354838e-07, "logits/generated": -2.14742374420166, "logits/oppo_generated": -2.7580342292785645, "logits/oppo_real": -2.948944330215454, "logits/real": -2.00089168548584, "logps/generated": -289.2415771484375, "logps/oppo_gen": -83.18161010742188, "logps/oppo_real": -301.14892578125, "logps/real": -444.14013671875, "loss": 0.4491, "loss/gen": 1.023041009902954, "loss/real": -0.5700880289077759, "rewards/accuracies": 0.875, "rewards/generated": -206.05996704101562, "rewards/margins": 63.068756103515625, "rewards/real": -142.9912109375, "step": 47 }, { "epoch": 0.25, "grad_norm": 34.17290379109778, "learning_rate": 4.955197132616487e-07, "logits/generated": -2.031747579574585, "logits/oppo_generated": -2.9268949031829834, "logits/oppo_real": -2.7888307571411133, "logits/real": -2.1852447986602783, "logps/generated": -290.5462341308594, "logps/oppo_gen": -73.60729217529297, "logps/oppo_real": -385.06817626953125, "logps/real": -524.4448852539062, "loss": 0.3841, "loss/gen": 0.9104207158088684, "loss/real": -0.6062330007553101, "rewards/accuracies": 0.875, "rewards/generated": -216.93894958496094, "rewards/margins": 77.56224060058594, "rewards/real": -139.376708984375, "step": 48 }, { "epoch": 0.26, "grad_norm": 29.687072365341113, "learning_rate": 4.950716845878136e-07, "logits/generated": -2.092398166656494, "logits/oppo_generated": -2.939999580383301, "logits/oppo_real": -2.972858428955078, "logits/real": -2.12141752243042, "logps/generated": -248.61563110351562, "logps/oppo_gen": -63.58892059326172, "logps/oppo_real": -292.31512451171875, "logps/real": -430.34051513671875, "loss": 0.3154, "loss/gen": 1.2107611894607544, "loss/real": -0.6197463274002075, "rewards/accuracies": 0.75, "rewards/generated": -185.02670288085938, "rewards/margins": 47.0013427734375, "rewards/real": -138.02536010742188, "step": 49 }, { "epoch": 0.26, "grad_norm": 34.51648439966765, "learning_rate": 4.946236559139784e-07, "logits/generated": -2.0396437644958496, "logits/oppo_generated": -2.9997801780700684, "logits/oppo_real": -3.0686826705932617, "logits/real": -2.159646511077881, "logps/generated": -287.21685791015625, "logps/oppo_gen": -74.3673095703125, "logps/oppo_real": -280.9033203125, "logps/real": -418.5960388183594, "loss": 0.2507, "loss/gen": 0.9513455629348755, "loss/real": -0.6230726838111877, "rewards/accuracies": 0.875, "rewards/generated": -212.8495330810547, "rewards/margins": 75.15680694580078, "rewards/real": -137.69273376464844, "step": 50 }, { "epoch": 0.27, "grad_norm": 34.51648439966765, "learning_rate": 4.946236559139784e-07, "logits/generated": -1.9025869369506836, "logits/oppo_generated": -2.9109854698181152, "logits/oppo_real": -2.8473780155181885, "logits/real": -2.126164197921753, "logps/generated": -262.5989990234375, "logps/oppo_gen": -68.43426513671875, "logps/oppo_real": -255.580810546875, "logps/real": -343.41925048828125, "loss": 0.2655, "loss/gen": 1.116389513015747, "loss/real": -1.1216154098510742, "rewards/accuracies": 1.0, "rewards/generated": -194.16476440429688, "rewards/margins": 106.3262939453125, "rewards/real": -87.83845520019531, "step": 51 }, { "epoch": 0.27, "grad_norm": 48.07575605554737, "learning_rate": 4.941756272401433e-07, "logits/generated": -1.9331986904144287, "logits/oppo_generated": -2.700697422027588, "logits/oppo_real": -2.7499947547912598, "logits/real": -1.9495654106140137, "logps/generated": -310.9825744628906, "logps/oppo_gen": -70.83297729492188, "logps/oppo_real": -279.63055419921875, "logps/real": -442.52166748046875, "loss": 0.2175, "loss/gen": 0.9699513912200928, "loss/real": -0.37108901143074036, "rewards/accuracies": 0.75, "rewards/generated": -240.14959716796875, "rewards/margins": 77.25849914550781, "rewards/real": -162.89111328125, "step": 52 }, { "epoch": 0.28, "grad_norm": 42.34599711519009, "learning_rate": 4.937275985663082e-07, "logits/generated": -2.342167854309082, "logits/oppo_generated": -2.8682141304016113, "logits/oppo_real": -3.2790589332580566, "logits/real": -2.054896116256714, "logps/generated": -309.45147705078125, "logps/oppo_gen": -81.61607360839844, "logps/oppo_real": -221.72312927246094, "logps/real": -368.44476318359375, "loss": 0.1235, "loss/gen": 0.8315409421920776, "loss/real": -0.532783567905426, "rewards/accuracies": 0.75, "rewards/generated": -227.83538818359375, "rewards/margins": 81.11372375488281, "rewards/real": -146.72164916992188, "step": 53 }, { "epoch": 0.28, "grad_norm": 25.769006325656385, "learning_rate": 4.932795698924731e-07, "logits/generated": -1.7544469833374023, "logits/oppo_generated": -2.7301034927368164, "logits/oppo_real": -2.709321975708008, "logits/real": -1.932969331741333, "logps/generated": -319.9211730957031, "logps/oppo_gen": -80.54927062988281, "logps/oppo_real": -310.97271728515625, "logps/real": -400.190673828125, "loss": 0.1326, "loss/gen": 0.9462176561355591, "loss/real": -1.1078202724456787, "rewards/accuracies": 1.0, "rewards/generated": -239.37188720703125, "rewards/margins": 150.15391540527344, "rewards/real": -89.21797943115234, "step": 54 }, { "epoch": 0.29, "grad_norm": 25.790073064114104, "learning_rate": 4.92831541218638e-07, "logits/generated": -1.9824426174163818, "logits/oppo_generated": -2.766693592071533, "logits/oppo_real": -2.925718307495117, "logits/real": -1.925614595413208, "logps/generated": -310.7677001953125, "logps/oppo_gen": -71.80207824707031, "logps/oppo_real": -235.78529357910156, "logps/real": -371.66021728515625, "loss": 0.0871, "loss/gen": 0.9566553831100464, "loss/real": -0.641250491142273, "rewards/accuracies": 1.0, "rewards/generated": -238.96560668945312, "rewards/margins": 103.09065246582031, "rewards/real": -135.8749542236328, "step": 55 }, { "epoch": 0.29, "grad_norm": 28.998453033227694, "learning_rate": 4.923835125448029e-07, "logits/generated": -1.9766473770141602, "logits/oppo_generated": -2.7838592529296875, "logits/oppo_real": -2.928971767425537, "logits/real": -1.9118558168411255, "logps/generated": -332.1834411621094, "logps/oppo_gen": -79.61759185791016, "logps/oppo_real": -232.3800811767578, "logps/real": -358.1265869140625, "loss": 0.073, "loss/gen": 0.9157878160476685, "loss/real": -0.7425349950790405, "rewards/accuracies": 0.875, "rewards/generated": -252.56585693359375, "rewards/margins": 126.81934356689453, "rewards/real": -125.74649810791016, "step": 56 }, { "epoch": 0.3, "grad_norm": 22.703764830970215, "learning_rate": 4.919354838709677e-07, "logits/generated": -1.9628534317016602, "logits/oppo_generated": -2.97432541847229, "logits/oppo_real": -2.8353166580200195, "logits/real": -2.0759224891662598, "logps/generated": -339.04296875, "logps/oppo_gen": -77.88506317138672, "logps/oppo_real": -276.94805908203125, "logps/real": -405.2261047363281, "loss": 0.0645, "loss/gen": 0.6549752950668335, "loss/real": -0.7172196507453918, "rewards/accuracies": 1.0, "rewards/generated": -261.15789794921875, "rewards/margins": 132.8798828125, "rewards/real": -128.2780303955078, "step": 57 }, { "epoch": 0.3, "grad_norm": 25.084795160459006, "learning_rate": 4.914874551971326e-07, "logits/generated": -1.7674864530563354, "logits/oppo_generated": -2.975834846496582, "logits/oppo_real": -2.730165719985962, "logits/real": -2.051088333129883, "logps/generated": -442.43011474609375, "logps/oppo_gen": -74.65117645263672, "logps/oppo_real": -194.0476531982422, "logps/real": -333.55999755859375, "loss": -0.0438, "loss/gen": 0.723225474357605, "loss/real": -0.6048767566680908, "rewards/accuracies": 0.875, "rewards/generated": -367.7789306640625, "rewards/margins": 228.2666015625, "rewards/real": -139.5123291015625, "step": 58 }, { "epoch": 0.31, "grad_norm": 26.174395867401362, "learning_rate": 4.910394265232975e-07, "logits/generated": -1.5783250331878662, "logits/oppo_generated": -2.842722177505493, "logits/oppo_real": -2.5714492797851562, "logits/real": -1.8060765266418457, "logps/generated": -329.1837158203125, "logps/oppo_gen": -54.848045349121094, "logps/oppo_real": -181.15826416015625, "logps/real": -288.05316162109375, "loss": -0.1401, "loss/gen": 0.6347978115081787, "loss/real": -0.9310512542724609, "rewards/accuracies": 1.0, "rewards/generated": -274.3356628417969, "rewards/margins": 167.4407958984375, "rewards/real": -106.89486694335938, "step": 59 }, { "epoch": 0.31, "grad_norm": 23.366499227158794, "learning_rate": 4.905913978494624e-07, "logits/generated": -1.848489761352539, "logits/oppo_generated": -2.70564603805542, "logits/oppo_real": -2.8469276428222656, "logits/real": -1.7071902751922607, "logps/generated": -299.8435974121094, "logps/oppo_gen": -58.444156646728516, "logps/oppo_real": -162.31228637695312, "logps/real": -293.0637512207031, "loss": -0.1455, "loss/gen": 0.7676070332527161, "loss/real": -0.6924855709075928, "rewards/accuracies": 1.0, "rewards/generated": -241.39942932128906, "rewards/margins": 110.64800262451172, "rewards/real": -130.75144958496094, "step": 60 }, { "epoch": 0.32, "grad_norm": 23.965715716104874, "learning_rate": 4.901433691756272e-07, "logits/generated": -2.067742347717285, "logits/oppo_generated": -2.7352287769317627, "logits/oppo_real": -3.062769889831543, "logits/real": -1.7817035913467407, "logps/generated": -279.8091125488281, "logps/oppo_gen": -58.14486312866211, "logps/oppo_real": -235.44610595703125, "logps/real": -335.95269775390625, "loss": -0.1697, "loss/gen": 0.8564858436584473, "loss/real": -0.9949342012405396, "rewards/accuracies": 1.0, "rewards/generated": -221.66424560546875, "rewards/margins": 121.15766143798828, "rewards/real": -100.506591796875, "step": 61 }, { "epoch": 0.32, "grad_norm": 22.71430331741021, "learning_rate": 4.896953405017921e-07, "logits/generated": -1.850874900817871, "logits/oppo_generated": -2.81040096282959, "logits/oppo_real": -2.7932534217834473, "logits/real": -1.9105334281921387, "logps/generated": -315.2555236816406, "logps/oppo_gen": -83.87113952636719, "logps/oppo_real": -450.6523132324219, "logps/real": -546.0828857421875, "loss": -0.1951, "loss/gen": 0.9174035787582397, "loss/real": -1.0456944704055786, "rewards/accuracies": 1.0, "rewards/generated": -231.38438415527344, "rewards/margins": 135.95382690429688, "rewards/real": -95.43055725097656, "step": 62 }, { "epoch": 0.33, "grad_norm": 24.960812793115707, "learning_rate": 4.89247311827957e-07, "logits/generated": -1.569549560546875, "logits/oppo_generated": -2.6111321449279785, "logits/oppo_real": -2.645878314971924, "logits/real": -1.4946357011795044, "logps/generated": -293.79986572265625, "logps/oppo_gen": -46.035884857177734, "logps/oppo_real": -112.12235260009766, "logps/real": -228.71566772460938, "loss": -0.2873, "loss/gen": 0.8553179502487183, "loss/real": -0.8340668082237244, "rewards/accuracies": 0.75, "rewards/generated": -247.76397705078125, "rewards/margins": 131.170654296875, "rewards/real": -116.59332275390625, "step": 63 }, { "epoch": 0.33, "grad_norm": 28.01833397213722, "learning_rate": 4.887992831541218e-07, "logits/generated": -1.4229357242584229, "logits/oppo_generated": -2.796260356903076, "logits/oppo_real": -2.7083005905151367, "logits/real": -1.875757098197937, "logps/generated": -314.1737365722656, "logps/oppo_gen": -87.55534362792969, "logps/oppo_real": -460.838623046875, "logps/real": -533.9716796875, "loss": -0.23, "loss/gen": 0.8128476142883301, "loss/real": -1.268669843673706, "rewards/accuracies": 1.0, "rewards/generated": -226.61837768554688, "rewards/margins": 153.48536682128906, "rewards/real": -73.13301849365234, "step": 64 }, { "epoch": 0.34, "grad_norm": 25.329698536099624, "learning_rate": 4.883512544802867e-07, "logits/generated": -1.8850572109222412, "logits/oppo_generated": -2.842528820037842, "logits/oppo_real": -3.0462865829467773, "logits/real": -1.8970359563827515, "logps/generated": -293.83636474609375, "logps/oppo_gen": -73.348388671875, "logps/oppo_real": -462.6760559082031, "logps/real": -521.508056640625, "loss": -0.2203, "loss/gen": 0.8699493408203125, "loss/real": -1.411679983139038, "rewards/accuracies": 1.0, "rewards/generated": -220.48794555664062, "rewards/margins": 161.65594482421875, "rewards/real": -58.832008361816406, "step": 65 }, { "epoch": 0.34, "grad_norm": 23.556966359497565, "learning_rate": 4.879032258064516e-07, "logits/generated": -1.7639917135238647, "logits/oppo_generated": -2.72807240486145, "logits/oppo_real": -3.0851736068725586, "logits/real": -1.7024996280670166, "logps/generated": -304.4716796875, "logps/oppo_gen": -62.214202880859375, "logps/oppo_real": -267.1644592285156, "logps/real": -361.82183837890625, "loss": -0.3428, "loss/gen": 0.7082281112670898, "loss/real": -1.0534261465072632, "rewards/accuracies": 1.0, "rewards/generated": -242.25747680664062, "rewards/margins": 147.60009765625, "rewards/real": -94.65738677978516, "step": 66 }, { "epoch": 0.35, "grad_norm": 26.432042922680157, "learning_rate": 4.874551971326164e-07, "logits/generated": -1.849442720413208, "logits/oppo_generated": -2.9244961738586426, "logits/oppo_real": -2.8555960655212402, "logits/real": -1.9950306415557861, "logps/generated": -432.98052978515625, "logps/oppo_gen": -76.25796508789062, "logps/oppo_real": -273.7462158203125, "logps/real": -344.53662109375, "loss": -0.3943, "loss/gen": 0.6396173238754272, "loss/real": -1.2920961380004883, "rewards/accuracies": 1.0, "rewards/generated": -356.7225341796875, "rewards/margins": 285.93218994140625, "rewards/real": -70.79039001464844, "step": 67 }, { "epoch": 0.36, "grad_norm": 25.96543112065059, "learning_rate": 4.870071684587813e-07, "logits/generated": -1.903306484222412, "logits/oppo_generated": -2.7076897621154785, "logits/oppo_real": -2.8026769161224365, "logits/real": -1.8154616355895996, "logps/generated": -306.98822021484375, "logps/oppo_gen": -92.40176391601562, "logps/oppo_real": -466.05743408203125, "logps/real": -509.5103759765625, "loss": -0.4503, "loss/gen": 1.017028570175171, "loss/real": -1.5654706954956055, "rewards/accuracies": 1.0, "rewards/generated": -214.58645629882812, "rewards/margins": 171.13351440429688, "rewards/real": -43.45293426513672, "step": 68 }, { "epoch": 0.36, "grad_norm": 24.761144113560263, "learning_rate": 4.865591397849462e-07, "logits/generated": -1.9471970796585083, "logits/oppo_generated": -2.91485595703125, "logits/oppo_real": -3.02333402633667, "logits/real": -1.9744383096694946, "logps/generated": -338.57110595703125, "logps/oppo_gen": -71.20426940917969, "logps/oppo_real": -316.012451171875, "logps/real": -401.06097412109375, "loss": -0.4181, "loss/gen": 0.5722190737724304, "loss/real": -1.1495147943496704, "rewards/accuracies": 1.0, "rewards/generated": -267.3668212890625, "rewards/margins": 182.3182830810547, "rewards/real": -85.04852294921875, "step": 69 }, { "epoch": 0.37, "grad_norm": 25.560786779768364, "learning_rate": 4.861111111111111e-07, "logits/generated": -1.8378194570541382, "logits/oppo_generated": -2.835737466812134, "logits/oppo_real": -2.825862407684326, "logits/real": -1.9024310111999512, "logps/generated": -395.5787658691406, "logps/oppo_gen": -66.29288482666016, "logps/oppo_real": -411.4686279296875, "logps/real": -450.5022277832031, "loss": -0.491, "loss/gen": 0.5810615420341492, "loss/real": -1.6096638441085815, "rewards/accuracies": 0.875, "rewards/generated": -329.285888671875, "rewards/margins": 290.2522888183594, "rewards/real": -39.03361511230469, "step": 70 }, { "epoch": 0.37, "grad_norm": 27.276715822734577, "learning_rate": 4.85663082437276e-07, "logits/generated": -1.2692300081253052, "logits/oppo_generated": -2.519876480102539, "logits/oppo_real": -2.3758904933929443, "logits/real": -1.504533052444458, "logps/generated": -309.310791015625, "logps/oppo_gen": -53.77077865600586, "logps/oppo_real": -232.32125854492188, "logps/real": -335.275390625, "loss": -0.4771, "loss/gen": 0.6719827651977539, "loss/real": -0.9704589247703552, "rewards/accuracies": 1.0, "rewards/generated": -255.53997802734375, "rewards/margins": 152.58587646484375, "rewards/real": -102.95411682128906, "step": 71 }, { "epoch": 0.38, "grad_norm": 31.6782193710368, "learning_rate": 4.852150537634409e-07, "logits/generated": -1.967892050743103, "logits/oppo_generated": -2.847916841506958, "logits/oppo_real": -3.119495391845703, "logits/real": -1.8703951835632324, "logps/generated": -302.1221618652344, "logps/oppo_gen": -61.632965087890625, "logps/oppo_real": -283.0968322753906, "logps/real": -316.48565673828125, "loss": -0.5487, "loss/gen": 0.792396605014801, "loss/real": -1.6661118268966675, "rewards/accuracies": 1.0, "rewards/generated": -240.48919677734375, "rewards/margins": 207.1003875732422, "rewards/real": -33.388816833496094, "step": 72 }, { "epoch": 0.38, "grad_norm": 28.101801413357496, "learning_rate": 4.847670250896057e-07, "logits/generated": -1.7287254333496094, "logits/oppo_generated": -2.817739963531494, "logits/oppo_real": -2.8102121353149414, "logits/real": -1.9246106147766113, "logps/generated": -348.30560302734375, "logps/oppo_gen": -84.71308135986328, "logps/oppo_real": -441.73095703125, "logps/real": -496.3192138671875, "loss": -0.5813, "loss/gen": 0.767849326133728, "loss/real": -1.454117774963379, "rewards/accuracies": 1.0, "rewards/generated": -263.5924987792969, "rewards/margins": 209.0042724609375, "rewards/real": -54.588233947753906, "step": 73 }, { "epoch": 0.39, "grad_norm": 38.45779443916408, "learning_rate": 4.843189964157705e-07, "logits/generated": -1.91994047164917, "logits/oppo_generated": -2.863375186920166, "logits/oppo_real": -2.9448790550231934, "logits/real": -1.9825626611709595, "logps/generated": -329.95269775390625, "logps/oppo_gen": -68.82878112792969, "logps/oppo_real": -370.04193115234375, "logps/real": -432.669189453125, "loss": -0.5133, "loss/gen": 0.658030092716217, "loss/real": -1.373727798461914, "rewards/accuracies": 1.0, "rewards/generated": -261.1239013671875, "rewards/margins": 198.49668884277344, "rewards/real": -62.62722396850586, "step": 74 }, { "epoch": 0.39, "grad_norm": 30.626852423248927, "learning_rate": 4.838709677419355e-07, "logits/generated": -1.6279140710830688, "logits/oppo_generated": -2.6728456020355225, "logits/oppo_real": -2.717794418334961, "logits/real": -1.7041985988616943, "logps/generated": -413.343017578125, "logps/oppo_gen": -96.53443908691406, "logps/oppo_real": -349.5957336425781, "logps/real": -410.91473388671875, "loss": -0.6334, "loss/gen": 0.40114909410476685, "loss/real": -1.3868098258972168, "rewards/accuracies": 1.0, "rewards/generated": -316.80859375, "rewards/margins": 255.48956298828125, "rewards/real": -61.31901550292969, "step": 75 }, { "epoch": 0.4, "grad_norm": 28.100519945214646, "learning_rate": 4.834229390681004e-07, "logits/generated": -1.8667380809783936, "logits/oppo_generated": -2.7693190574645996, "logits/oppo_real": -2.77004337310791, "logits/real": -1.9453085660934448, "logps/generated": -386.6142578125, "logps/oppo_gen": -76.46708679199219, "logps/oppo_real": -404.62432861328125, "logps/real": -449.008056640625, "loss": -0.7145, "loss/gen": 0.4149353802204132, "loss/real": -1.5561623573303223, "rewards/accuracies": 1.0, "rewards/generated": -310.1471862792969, "rewards/margins": 265.763427734375, "rewards/real": -44.383766174316406, "step": 76 }, { "epoch": 0.4, "grad_norm": 28.100519945214646, "learning_rate": 4.834229390681004e-07, "logits/generated": -1.9856168031692505, "logits/oppo_generated": -2.82077693939209, "logits/oppo_real": -2.9570560455322266, "logits/real": -1.8599485158920288, "logps/generated": -351.98699951171875, "logps/oppo_gen": -75.9544906616211, "logps/oppo_real": -272.2792663574219, "logps/real": -342.5171813964844, "loss": -0.7074, "loss/gen": 0.5127630233764648, "loss/real": -1.2976210117340088, "rewards/accuracies": 1.0, "rewards/generated": -276.03253173828125, "rewards/margins": 205.79464721679688, "rewards/real": -70.23788452148438, "step": 77 }, { "epoch": 0.41, "grad_norm": 28.89926119061318, "learning_rate": 4.829749103942652e-07, "logits/generated": -1.4996392726898193, "logits/oppo_generated": -2.77388858795166, "logits/oppo_real": -2.6705479621887207, "logits/real": -1.6910290718078613, "logps/generated": -306.9933166503906, "logps/oppo_gen": -44.69869613647461, "logps/oppo_real": -146.0938720703125, "logps/real": -247.02572631835938, "loss": -0.7324, "loss/gen": 0.6118674278259277, "loss/real": -0.990681529045105, "rewards/accuracies": 1.0, "rewards/generated": -262.29461669921875, "rewards/margins": 161.36277770996094, "rewards/real": -100.93185424804688, "step": 78 }, { "epoch": 0.41, "grad_norm": 31.06954538435709, "learning_rate": 4.825268817204301e-07, "logits/generated": -1.8742992877960205, "logits/oppo_generated": -2.823974847793579, "logits/oppo_real": -2.8963050842285156, "logits/real": -1.9178167581558228, "logps/generated": -351.6239929199219, "logps/oppo_gen": -77.19644165039062, "logps/oppo_real": -326.88067626953125, "logps/real": -378.48602294921875, "loss": -0.698, "loss/gen": 0.5047196745872498, "loss/real": -1.4839468002319336, "rewards/accuracies": 1.0, "rewards/generated": -274.42755126953125, "rewards/margins": 222.82223510742188, "rewards/real": -51.60532760620117, "step": 79 }, { "epoch": 0.42, "grad_norm": 43.30920802785477, "learning_rate": 4.820788530465949e-07, "logits/generated": -1.7955554723739624, "logits/oppo_generated": -2.8842811584472656, "logits/oppo_real": -2.962029457092285, "logits/real": -1.9502441883087158, "logps/generated": -325.9687194824219, "logps/oppo_gen": -54.408782958984375, "logps/oppo_real": -296.562255859375, "logps/real": -371.3411865234375, "loss": -0.6694, "loss/gen": 0.6074373126029968, "loss/real": -1.2522108554840088, "rewards/accuracies": 1.0, "rewards/generated": -271.5599365234375, "rewards/margins": 196.781005859375, "rewards/real": -74.77892303466797, "step": 80 }, { "epoch": 0.42, "grad_norm": 67.32772068514016, "learning_rate": 4.816308243727598e-07, "logits/generated": -1.738360047340393, "logits/oppo_generated": -2.689702272415161, "logits/oppo_real": -2.605893611907959, "logits/real": -1.9019668102264404, "logps/generated": -328.74029541015625, "logps/oppo_gen": -72.64117431640625, "logps/oppo_real": -543.97119140625, "logps/real": -576.9856567382812, "loss": -0.7155, "loss/gen": 0.8135882616043091, "loss/real": -1.6698557138442993, "rewards/accuracies": 0.875, "rewards/generated": -256.09912109375, "rewards/margins": 223.08465576171875, "rewards/real": -33.014434814453125, "step": 81 }, { "epoch": 0.43, "grad_norm": 45.615423331403214, "learning_rate": 4.811827956989247e-07, "logits/generated": -1.8823816776275635, "logits/oppo_generated": -2.6806015968322754, "logits/oppo_real": -2.851822853088379, "logits/real": -1.7596267461776733, "logps/generated": -390.17926025390625, "logps/oppo_gen": -60.20751953125, "logps/oppo_real": -257.2502136230469, "logps/real": -304.0963439941406, "loss": -0.8542, "loss/gen": 0.6062071919441223, "loss/real": -1.531538486480713, "rewards/accuracies": 1.0, "rewards/generated": -329.97174072265625, "rewards/margins": 283.1255798339844, "rewards/real": -46.84613800048828, "step": 82 }, { "epoch": 0.43, "grad_norm": 82.74539439128168, "learning_rate": 4.807347670250896e-07, "logits/generated": -2.0427887439727783, "logits/oppo_generated": -2.855865001678467, "logits/oppo_real": -3.10068416595459, "logits/real": -1.9768484830856323, "logps/generated": -352.57281494140625, "logps/oppo_gen": -83.05951690673828, "logps/oppo_real": -407.9609375, "logps/real": -482.402587890625, "loss": -0.7341, "loss/gen": 0.5735456347465515, "loss/real": -1.2555840015411377, "rewards/accuracies": 0.875, "rewards/generated": -269.5133056640625, "rewards/margins": 195.07171630859375, "rewards/real": -74.44159698486328, "step": 83 }, { "epoch": 0.44, "grad_norm": 59.27270587807996, "learning_rate": 4.802867383512544e-07, "logits/generated": -2.0733423233032227, "logits/oppo_generated": -2.820967197418213, "logits/oppo_real": -2.9550280570983887, "logits/real": -1.9983410835266113, "logps/generated": -397.8853454589844, "logps/oppo_gen": -113.86212921142578, "logps/oppo_real": -391.526123046875, "logps/real": -436.72381591796875, "loss": -0.9052, "loss/gen": 0.5662827491760254, "loss/real": -1.548022985458374, "rewards/accuracies": 1.0, "rewards/generated": -284.0232238769531, "rewards/margins": 238.82553100585938, "rewards/real": -45.19770050048828, "step": 84 }, { "epoch": 0.44, "grad_norm": 48.762087844323275, "learning_rate": 4.798387096774193e-07, "logits/generated": -1.8719103336334229, "logits/oppo_generated": -2.8029284477233887, "logits/oppo_real": -2.9302010536193848, "logits/real": -1.9117934703826904, "logps/generated": -291.1409912109375, "logps/oppo_gen": -59.32632827758789, "logps/oppo_real": -256.26556396484375, "logps/real": -281.3046875, "loss": -0.7887, "loss/gen": 0.9425208568572998, "loss/real": -1.7496089935302734, "rewards/accuracies": 1.0, "rewards/generated": -231.814697265625, "rewards/margins": 206.77557373046875, "rewards/real": -25.03911590576172, "step": 85 }, { "epoch": 0.45, "grad_norm": 54.32774527117797, "learning_rate": 4.793906810035842e-07, "logits/generated": -1.8661472797393799, "logits/oppo_generated": -2.915862560272217, "logits/oppo_real": -2.777987480163574, "logits/real": -2.0660204887390137, "logps/generated": -330.3395080566406, "logps/oppo_gen": -69.80546569824219, "logps/oppo_real": -230.58383178710938, "logps/real": -266.6177673339844, "loss": -0.93, "loss/gen": 0.6018867492675781, "loss/real": -1.6396608352661133, "rewards/accuracies": 1.0, "rewards/generated": -260.5340576171875, "rewards/margins": 224.50010681152344, "rewards/real": -36.03392791748047, "step": 86 }, { "epoch": 0.45, "grad_norm": 56.34434795855748, "learning_rate": 4.789426523297491e-07, "logits/generated": -1.8885971307754517, "logits/oppo_generated": -3.002845048904419, "logits/oppo_real": -2.9699549674987793, "logits/real": -2.183290719985962, "logps/generated": -343.339599609375, "logps/oppo_gen": -82.54539489746094, "logps/oppo_real": -261.07891845703125, "logps/real": -305.88140869140625, "loss": -0.9169, "loss/gen": 0.6482059955596924, "loss/real": -1.5519750118255615, "rewards/accuracies": 1.0, "rewards/generated": -260.794189453125, "rewards/margins": 215.99168395996094, "rewards/real": -44.80250930786133, "step": 87 }, { "epoch": 0.46, "grad_norm": 51.83929494119302, "learning_rate": 4.78494623655914e-07, "logits/generated": -1.9645639657974243, "logits/oppo_generated": -2.8191170692443848, "logits/oppo_real": -2.8346924781799316, "logits/real": -2.02170991897583, "logps/generated": -359.3294982910156, "logps/oppo_gen": -77.02418518066406, "logps/oppo_real": -365.96343994140625, "logps/real": -455.93804931640625, "loss": -0.7995, "loss/gen": 0.5177488327026367, "loss/real": -1.1002535820007324, "rewards/accuracies": 1.0, "rewards/generated": -282.3053283691406, "rewards/margins": 192.3306884765625, "rewards/real": -89.97463989257812, "step": 88 }, { "epoch": 0.46, "grad_norm": 68.54197932426156, "learning_rate": 4.780465949820789e-07, "logits/generated": -2.152953624725342, "logits/oppo_generated": -2.9769649505615234, "logits/oppo_real": -3.0434319972991943, "logits/real": -2.1738698482513428, "logps/generated": -345.0735778808594, "logps/oppo_gen": -78.12904357910156, "logps/oppo_real": -379.5708312988281, "logps/real": -461.0662536621094, "loss": -0.92, "loss/gen": 0.7530688047409058, "loss/real": -1.1850459575653076, "rewards/accuracies": 1.0, "rewards/generated": -266.94451904296875, "rewards/margins": 185.4491424560547, "rewards/real": -81.49540710449219, "step": 89 }, { "epoch": 0.47, "grad_norm": 50.14855129777554, "learning_rate": 4.775985663082437e-07, "logits/generated": -1.7497427463531494, "logits/oppo_generated": -2.8537919521331787, "logits/oppo_real": -2.717353105545044, "logits/real": -2.008902072906494, "logps/generated": -428.0735778808594, "logps/oppo_gen": -119.7020492553711, "logps/oppo_real": -358.0323791503906, "logps/real": -440.1455383300781, "loss": -1.0468, "loss/gen": 0.5207056999206543, "loss/real": -1.178868293762207, "rewards/accuracies": 1.0, "rewards/generated": -308.3714904785156, "rewards/margins": 226.25833129882812, "rewards/real": -82.11316680908203, "step": 90 }, { "epoch": 0.48, "grad_norm": 83.74393343233139, "learning_rate": 4.771505376344086e-07, "logits/generated": -1.6041526794433594, "logits/oppo_generated": -2.863269805908203, "logits/oppo_real": -2.6908156871795654, "logits/real": -1.9434775114059448, "logps/generated": -464.01544189453125, "logps/oppo_gen": -73.95469665527344, "logps/oppo_real": -308.79437255859375, "logps/real": -393.7951354980469, "loss": -0.8861, "loss/gen": 0.208269402384758, "loss/real": -1.1499923467636108, "rewards/accuracies": 1.0, "rewards/generated": -390.06072998046875, "rewards/margins": 305.0599365234375, "rewards/real": -85.00077819824219, "step": 91 }, { "epoch": 0.48, "grad_norm": 51.583056981707905, "learning_rate": 4.7670250896057344e-07, "logits/generated": -1.8397870063781738, "logits/oppo_generated": -2.8908724784851074, "logits/oppo_real": -2.8895483016967773, "logits/real": -1.9887313842773438, "logps/generated": -355.3204650878906, "logps/oppo_gen": -76.94686126708984, "logps/oppo_real": -271.02813720703125, "logps/real": -311.24884033203125, "loss": -1.0606, "loss/gen": 0.5185320377349854, "loss/real": -1.5977928638458252, "rewards/accuracies": 1.0, "rewards/generated": -278.37359619140625, "rewards/margins": 238.15289306640625, "rewards/real": -40.22071075439453, "step": 92 }, { "epoch": 0.49, "grad_norm": 84.3485346225336, "learning_rate": 4.762544802867383e-07, "logits/generated": -2.00917387008667, "logits/oppo_generated": -2.873152732849121, "logits/oppo_real": -2.9451375007629395, "logits/real": -2.055788993835449, "logps/generated": -423.2607421875, "logps/oppo_gen": -63.39752960205078, "logps/oppo_real": -189.16378784179688, "logps/real": -238.4603271484375, "loss": -0.9892, "loss/gen": 0.42659705877304077, "loss/real": -1.5070346593856812, "rewards/accuracies": 1.0, "rewards/generated": -359.86322021484375, "rewards/margins": 310.566650390625, "rewards/real": -49.296546936035156, "step": 93 }, { "epoch": 0.49, "grad_norm": 56.707858802814975, "learning_rate": 4.758064516129032e-07, "logits/generated": -1.7914602756500244, "logits/oppo_generated": -2.698634147644043, "logits/oppo_real": -2.8618617057800293, "logits/real": -1.7156788110733032, "logps/generated": -337.7628479003906, "logps/oppo_gen": -66.55247497558594, "logps/oppo_real": -330.7273254394531, "logps/real": -382.69952392578125, "loss": -1.1736, "loss/gen": 0.5937217473983765, "loss/real": -1.4802782535552979, "rewards/accuracies": 0.875, "rewards/generated": -271.21038818359375, "rewards/margins": 219.2382049560547, "rewards/real": -51.9721794128418, "step": 94 }, { "epoch": 0.5, "grad_norm": 63.84164674704906, "learning_rate": 4.753584229390681e-07, "logits/generated": -1.484118103981018, "logits/oppo_generated": -2.6399593353271484, "logits/oppo_real": -2.7412514686584473, "logits/real": -1.6595765352249146, "logps/generated": -318.4638366699219, "logps/oppo_gen": -71.26600646972656, "logps/oppo_real": -342.77703857421875, "logps/real": -332.076416015625, "loss": -1.0251, "loss/gen": 1.2006943225860596, "loss/real": -2.107006311416626, "rewards/accuracies": 1.0, "rewards/generated": -247.19781494140625, "rewards/margins": 257.8984680175781, "rewards/real": 10.700631141662598, "step": 95 }, { "epoch": 0.5, "grad_norm": 75.05080961213876, "learning_rate": 4.749103942652329e-07, "logits/generated": -1.6428945064544678, "logits/oppo_generated": -2.657951831817627, "logits/oppo_real": -2.851677656173706, "logits/real": -1.653546929359436, "logps/generated": -400.077880859375, "logps/oppo_gen": -85.37565612792969, "logps/oppo_real": -318.65338134765625, "logps/real": -342.5865173339844, "loss": -1.161, "loss/gen": 0.37989306449890137, "loss/real": -1.7606685161590576, "rewards/accuracies": 1.0, "rewards/generated": -314.70220947265625, "rewards/margins": 290.7690734863281, "rewards/real": -23.933155059814453, "step": 96 }, { "epoch": 0.51, "grad_norm": 46.01563826400488, "learning_rate": 4.7446236559139785e-07, "logits/generated": -1.8539488315582275, "logits/oppo_generated": -2.7544326782226562, "logits/oppo_real": -2.9937453269958496, "logits/real": -1.6929676532745361, "logps/generated": -571.8555908203125, "logps/oppo_gen": -91.8690185546875, "logps/oppo_real": -138.0150604248047, "logps/real": -236.94998168945312, "loss": -1.303, "loss/gen": 0.19529122114181519, "loss/real": -1.010650634765625, "rewards/accuracies": 1.0, "rewards/generated": -479.98651123046875, "rewards/margins": 381.0516357421875, "rewards/real": -98.93492889404297, "step": 97 }, { "epoch": 0.51, "grad_norm": 108.21823112061571, "learning_rate": 4.740143369175627e-07, "logits/generated": -1.5786761045455933, "logits/oppo_generated": -2.937568187713623, "logits/oppo_real": -2.910910129547119, "logits/real": -2.0116238594055176, "logps/generated": -319.71612548828125, "logps/oppo_gen": -72.81363677978516, "logps/oppo_real": -349.1295166015625, "logps/real": -343.4236145019531, "loss": -1.2469, "loss/gen": 1.134260654449463, "loss/real": -2.0570592880249023, "rewards/accuracies": 1.0, "rewards/generated": -246.9025115966797, "rewards/margins": 252.60841369628906, "rewards/real": 5.705905914306641, "step": 98 }, { "epoch": 0.52, "grad_norm": 61.41563819772921, "learning_rate": 4.7356630824372756e-07, "logits/generated": -1.6027933359146118, "logits/oppo_generated": -2.8456101417541504, "logits/oppo_real": -2.7031779289245605, "logits/real": -1.860973596572876, "logps/generated": -398.1639404296875, "logps/oppo_gen": -74.38111114501953, "logps/oppo_real": -210.15036010742188, "logps/real": -244.78244018554688, "loss": -1.2393, "loss/gen": 0.38152068853378296, "loss/real": -1.653679370880127, "rewards/accuracies": 1.0, "rewards/generated": -323.7828369140625, "rewards/margins": 289.1507568359375, "rewards/real": -34.63207244873047, "step": 99 }, { "epoch": 0.52, "grad_norm": 155.75063954523588, "learning_rate": 4.731182795698925e-07, "logits/generated": -1.7713713645935059, "logits/oppo_generated": -2.8624844551086426, "logits/oppo_real": -3.1369876861572266, "logits/real": -1.7838329076766968, "logps/generated": -434.6895446777344, "logps/oppo_gen": -89.36515808105469, "logps/oppo_real": -372.01629638671875, "logps/real": -403.449462890625, "loss": -1.0078, "loss/gen": 0.25918668508529663, "loss/real": -1.6856684684753418, "rewards/accuracies": 1.0, "rewards/generated": -345.32440185546875, "rewards/margins": 313.8912353515625, "rewards/real": -31.433147430419922, "step": 100 }, { "epoch": 0.53, "grad_norm": 110.75212945892959, "learning_rate": 4.726702508960573e-07, "logits/generated": -1.7238816022872925, "logits/oppo_generated": -2.851134777069092, "logits/oppo_real": -2.806102752685547, "logits/real": -1.8398982286453247, "logps/generated": -455.3783874511719, "logps/oppo_gen": -102.20521545410156, "logps/oppo_real": -303.71771240234375, "logps/real": -384.83721923828125, "loss": -1.2778, "loss/gen": 0.28883445262908936, "loss/real": -1.1888045072555542, "rewards/accuracies": 1.0, "rewards/generated": -353.17315673828125, "rewards/margins": 272.05364990234375, "rewards/real": -81.11953735351562, "step": 101 }, { "epoch": 0.53, "grad_norm": 208.35696532633605, "learning_rate": 4.722222222222222e-07, "logits/generated": -1.8423570394515991, "logits/oppo_generated": -2.7869691848754883, "logits/oppo_real": -2.934145212173462, "logits/real": -1.8845537900924683, "logps/generated": -446.48486328125, "logps/oppo_gen": -118.00005340576172, "logps/oppo_real": -354.1058654785156, "logps/real": -363.6239013671875, "loss": -0.9829, "loss/gen": 0.3972959518432617, "loss/real": -1.904819130897522, "rewards/accuracies": 1.0, "rewards/generated": -328.4848327636719, "rewards/margins": 318.96673583984375, "rewards/real": -9.518078804016113, "step": 102 }, { "epoch": 0.54, "grad_norm": 172.94304517672862, "learning_rate": 4.717741935483871e-07, "logits/generated": -1.7597293853759766, "logits/oppo_generated": -2.940918445587158, "logits/oppo_real": -2.8966355323791504, "logits/real": -2.1739630699157715, "logps/generated": -335.61669921875, "logps/oppo_gen": -59.739017486572266, "logps/oppo_real": -344.7768249511719, "logps/real": -356.95220947265625, "loss": -1.2174, "loss/gen": 0.9002517461776733, "loss/real": -1.8782463073730469, "rewards/accuracies": 1.0, "rewards/generated": -275.877685546875, "rewards/margins": 263.70233154296875, "rewards/real": -12.175359725952148, "step": 103 }, { "epoch": 0.54, "grad_norm": 132.76997113193556, "learning_rate": 4.7132616487455197e-07, "logits/generated": -1.8769468069076538, "logits/oppo_generated": -2.7915775775909424, "logits/oppo_real": -3.1147103309631348, "logits/real": -1.7647349834442139, "logps/generated": -427.8299255371094, "logps/oppo_gen": -102.62004089355469, "logps/oppo_real": -260.40576171875, "logps/real": -333.41436767578125, "loss": -1.182, "loss/gen": 0.3168797492980957, "loss/real": -1.269913911819458, "rewards/accuracies": 1.0, "rewards/generated": -325.20989990234375, "rewards/margins": 252.20127868652344, "rewards/real": -73.00860595703125, "step": 104 }, { "epoch": 0.55, "grad_norm": 63.105612740666736, "learning_rate": 4.708781362007168e-07, "logits/generated": -1.6452438831329346, "logits/oppo_generated": -2.487020492553711, "logits/oppo_real": -2.6723856925964355, "logits/real": -1.5103099346160889, "logps/generated": -434.71832275390625, "logps/oppo_gen": -165.93902587890625, "logps/oppo_real": -273.2274169921875, "logps/real": -338.7171630859375, "loss": -1.2914, "loss/gen": 1.2655643224716187, "loss/real": -1.3451025485992432, "rewards/accuracies": 0.875, "rewards/generated": -268.7793273925781, "rewards/margins": 203.28956604003906, "rewards/real": -65.48976135253906, "step": 105 }, { "epoch": 0.55, "grad_norm": 108.93035063315688, "learning_rate": 4.7043010752688173e-07, "logits/generated": -1.6793558597564697, "logits/oppo_generated": -2.73710560798645, "logits/oppo_real": -2.767047166824341, "logits/real": -1.719926357269287, "logps/generated": -647.4390258789062, "logps/oppo_gen": -92.0302963256836, "logps/oppo_real": -215.4584503173828, "logps/real": -281.78326416015625, "loss": -1.2024, "loss/gen": 0.2647426724433899, "loss/real": -1.336751937866211, "rewards/accuracies": 0.875, "rewards/generated": -555.40869140625, "rewards/margins": 489.08392333984375, "rewards/real": -66.32481384277344, "step": 106 }, { "epoch": 0.56, "grad_norm": 59.14760526838238, "learning_rate": 4.6998207885304656e-07, "logits/generated": -1.0961192846298218, "logits/oppo_generated": -2.6469738483428955, "logits/oppo_real": -2.294445037841797, "logits/real": -1.6948471069335938, "logps/generated": -425.1643981933594, "logps/oppo_gen": -108.79867553710938, "logps/oppo_real": -322.5262756347656, "logps/real": -321.787841796875, "loss": -1.324, "loss/gen": 0.8741945028305054, "loss/real": -2.0073840618133545, "rewards/accuracies": 1.0, "rewards/generated": -316.36572265625, "rewards/margins": 317.1040954589844, "rewards/real": 0.7384042739868164, "step": 107 }, { "epoch": 0.56, "grad_norm": 168.3731266135573, "learning_rate": 4.6953405017921144e-07, "logits/generated": -1.9429898262023926, "logits/oppo_generated": -2.5572714805603027, "logits/oppo_real": -2.9609758853912354, "logits/real": -1.4800224304199219, "logps/generated": -361.43359375, "logps/oppo_gen": -79.15040588378906, "logps/oppo_real": -370.16033935546875, "logps/real": -355.4413757324219, "loss": -1.2951, "loss/gen": 0.8757161498069763, "loss/real": -2.147189140319824, "rewards/accuracies": 1.0, "rewards/generated": -282.2831726074219, "rewards/margins": 297.0021057128906, "rewards/real": 14.718932151794434, "step": 108 }, { "epoch": 0.57, "grad_norm": 85.13344591452439, "learning_rate": 4.690860215053763e-07, "logits/generated": -1.8185949325561523, "logits/oppo_generated": -2.818962335586548, "logits/oppo_real": -2.974072217941284, "logits/real": -1.8212263584136963, "logps/generated": -416.0355224609375, "logps/oppo_gen": -87.5977783203125, "logps/oppo_real": -245.32896423339844, "logps/real": -231.43038940429688, "loss": -1.3389, "loss/gen": 0.3807252049446106, "loss/real": -2.1389856338500977, "rewards/accuracies": 1.0, "rewards/generated": -328.437744140625, "rewards/margins": 342.3363037109375, "rewards/real": 13.898568153381348, "step": 109 }, { "epoch": 0.57, "grad_norm": 83.88629301769151, "learning_rate": 4.686379928315412e-07, "logits/generated": -1.616201400756836, "logits/oppo_generated": -2.633566379547119, "logits/oppo_real": -2.6259918212890625, "logits/real": -1.8099052906036377, "logps/generated": -367.63824462890625, "logps/oppo_gen": -84.75750732421875, "logps/oppo_real": -315.4161376953125, "logps/real": -310.99310302734375, "loss": -1.3142, "loss/gen": 1.0110325813293457, "loss/real": -2.0442302227020264, "rewards/accuracies": 1.0, "rewards/generated": -282.8807373046875, "rewards/margins": 287.30377197265625, "rewards/real": 4.4230194091796875, "step": 110 }, { "epoch": 0.58, "grad_norm": 105.49686199420815, "learning_rate": 4.681899641577061e-07, "logits/generated": -1.8293168544769287, "logits/oppo_generated": -2.7601919174194336, "logits/oppo_real": -2.861198902130127, "logits/real": -1.8698251247406006, "logps/generated": -464.19207763671875, "logps/oppo_gen": -63.106407165527344, "logps/oppo_real": -254.43199157714844, "logps/real": -266.5618591308594, "loss": -1.2892, "loss/gen": 0.18153703212738037, "loss/real": -1.8787013292312622, "rewards/accuracies": 1.0, "rewards/generated": -401.08563232421875, "rewards/margins": 388.95574951171875, "rewards/real": -12.129861831665039, "step": 111 }, { "epoch": 0.58, "grad_norm": 122.08849089646002, "learning_rate": 4.677419354838709e-07, "logits/generated": -1.6834774017333984, "logits/oppo_generated": -2.9130988121032715, "logits/oppo_real": -2.7563557624816895, "logits/real": -2.015559673309326, "logps/generated": -434.40545654296875, "logps/oppo_gen": -71.0981216430664, "logps/oppo_real": -282.122314453125, "logps/real": -366.6064453125, "loss": -1.2773, "loss/gen": 0.20630814135074615, "loss/real": -1.1551584005355835, "rewards/accuracies": 1.0, "rewards/generated": -363.30731201171875, "rewards/margins": 278.8231506347656, "rewards/real": -84.48416137695312, "step": 112 }, { "epoch": 0.59, "grad_norm": 68.69515212721528, "learning_rate": 4.6729390681003585e-07, "logits/generated": -1.648697853088379, "logits/oppo_generated": -2.9668259620666504, "logits/oppo_real": -2.745316505432129, "logits/real": -2.1867191791534424, "logps/generated": -530.75732421875, "logps/oppo_gen": -77.98722076416016, "logps/oppo_real": -298.9158020019531, "logps/real": -301.756103515625, "loss": -1.4792, "loss/gen": 0.20026695728302002, "loss/real": -1.9715969562530518, "rewards/accuracies": 1.0, "rewards/generated": -452.77008056640625, "rewards/margins": 449.92974853515625, "rewards/real": -2.8403053283691406, "step": 113 }, { "epoch": 0.6, "grad_norm": 57.901015430565856, "learning_rate": 4.668458781362007e-07, "logits/generated": -1.9489855766296387, "logits/oppo_generated": -2.750535726547241, "logits/oppo_real": -3.053309440612793, "logits/real": -1.809377670288086, "logps/generated": -428.41241455078125, "logps/oppo_gen": -60.58064270019531, "logps/oppo_real": -289.3247985839844, "logps/real": -279.5442199707031, "loss": -1.3882, "loss/gen": 0.2120557278394699, "loss/real": -2.097805976867676, "rewards/accuracies": 1.0, "rewards/generated": -367.831787109375, "rewards/margins": 377.6123962402344, "rewards/real": 9.780599594116211, "step": 114 }, { "epoch": 0.6, "grad_norm": 74.52388262853225, "learning_rate": 4.6639784946236556e-07, "logits/generated": -2.027777910232544, "logits/oppo_generated": -2.857908248901367, "logits/oppo_real": -2.9202375411987305, "logits/real": -2.0229392051696777, "logps/generated": -437.67413330078125, "logps/oppo_gen": -151.37307739257812, "logps/oppo_real": -296.711181640625, "logps/real": -294.72747802734375, "loss": -1.2855, "loss/gen": 1.1820727586746216, "loss/real": -2.019836902618408, "rewards/accuracies": 1.0, "rewards/generated": -286.301025390625, "rewards/margins": 288.28472900390625, "rewards/real": 1.983699083328247, "step": 115 }, { "epoch": 0.61, "grad_norm": 117.49908576771992, "learning_rate": 4.6594982078853044e-07, "logits/generated": -1.4556838274002075, "logits/oppo_generated": -2.790827751159668, "logits/oppo_real": -2.5702054500579834, "logits/real": -1.9723587036132812, "logps/generated": -398.6170959472656, "logps/oppo_gen": -72.09220123291016, "logps/oppo_real": -381.5404357910156, "logps/real": -404.51959228515625, "loss": -1.3209, "loss/gen": 0.30479684472084045, "loss/real": -1.7702081203460693, "rewards/accuracies": 1.0, "rewards/generated": -326.5248718261719, "rewards/margins": 303.54571533203125, "rewards/real": -22.979171752929688, "step": 116 }, { "epoch": 0.61, "grad_norm": 65.84271178583474, "learning_rate": 4.655017921146953e-07, "logits/generated": -1.4536468982696533, "logits/oppo_generated": -2.7350287437438965, "logits/oppo_real": -2.7642884254455566, "logits/real": -1.7534418106079102, "logps/generated": -564.8858642578125, "logps/oppo_gen": -75.31367492675781, "logps/oppo_real": -265.2264709472656, "logps/real": -327.7047424316406, "loss": -1.3234, "loss/gen": 0.1534099280834198, "loss/real": -1.3752171993255615, "rewards/accuracies": 1.0, "rewards/generated": -489.57220458984375, "rewards/margins": 427.0939025878906, "rewards/real": -62.4782829284668, "step": 117 }, { "epoch": 0.62, "grad_norm": 61.410231661624394, "learning_rate": 4.6505376344086015e-07, "logits/generated": -1.8642635345458984, "logits/oppo_generated": -2.9040493965148926, "logits/oppo_real": -3.1028363704681396, "logits/real": -1.919235110282898, "logps/generated": -544.6783447265625, "logps/oppo_gen": -125.20480346679688, "logps/oppo_real": -288.18572998046875, "logps/real": -292.4162902832031, "loss": -1.5366, "loss/gen": 0.14077386260032654, "loss/real": -1.9576942920684814, "rewards/accuracies": 1.0, "rewards/generated": -419.47357177734375, "rewards/margins": 415.24298095703125, "rewards/real": -4.230566024780273, "step": 118 }, { "epoch": 0.62, "grad_norm": 81.7004759682433, "learning_rate": 4.646057347670251e-07, "logits/generated": -1.400865077972412, "logits/oppo_generated": -2.81662917137146, "logits/oppo_real": -2.771235227584839, "logits/real": -1.9147589206695557, "logps/generated": -483.5452880859375, "logps/oppo_gen": -75.8317642211914, "logps/oppo_real": -309.955078125, "logps/real": -345.3260498046875, "loss": -1.477, "loss/gen": 0.14227358996868134, "loss/real": -1.6462900638580322, "rewards/accuracies": 1.0, "rewards/generated": -407.7135009765625, "rewards/margins": 372.342529296875, "rewards/real": -35.37098693847656, "step": 119 }, { "epoch": 0.63, "grad_norm": 76.67643945298647, "learning_rate": 4.641577060931899e-07, "logits/generated": -1.515089511871338, "logits/oppo_generated": -2.6174123287200928, "logits/oppo_real": -2.623584747314453, "logits/real": -1.833693265914917, "logps/generated": -522.1652221679688, "logps/oppo_gen": -89.05635070800781, "logps/oppo_real": -326.333251953125, "logps/real": -330.11474609375, "loss": -1.4459, "loss/gen": 0.16859720647335052, "loss/real": -1.9621846675872803, "rewards/accuracies": 1.0, "rewards/generated": -433.10888671875, "rewards/margins": 429.32733154296875, "rewards/real": -3.7815260887145996, "step": 120 }, { "epoch": 0.63, "grad_norm": 82.81281249941038, "learning_rate": 4.637096774193548e-07, "logits/generated": -1.8157904148101807, "logits/oppo_generated": -3.0457491874694824, "logits/oppo_real": -2.8690385818481445, "logits/real": -2.337892532348633, "logps/generated": -435.04327392578125, "logps/oppo_gen": -73.73023986816406, "logps/oppo_real": -327.18359375, "logps/real": -325.4014892578125, "loss": -1.5047, "loss/gen": 0.7044680118560791, "loss/real": -2.0178208351135254, "rewards/accuracies": 0.875, "rewards/generated": -361.31298828125, "rewards/margins": 363.0950927734375, "rewards/real": 1.7820682525634766, "step": 121 }, { "epoch": 0.64, "grad_norm": 125.18439524357659, "learning_rate": 4.6326164874551973e-07, "logits/generated": -1.8496112823486328, "logits/oppo_generated": -2.835294246673584, "logits/oppo_real": -2.8998498916625977, "logits/real": -2.04846453666687, "logps/generated": -464.38885498046875, "logps/oppo_gen": -70.26353454589844, "logps/oppo_real": -340.30975341796875, "logps/real": -354.45721435546875, "loss": -1.4469, "loss/gen": 0.40815508365631104, "loss/real": -1.858525276184082, "rewards/accuracies": 1.0, "rewards/generated": -394.12530517578125, "rewards/margins": 379.97784423828125, "rewards/real": -14.14747428894043, "step": 122 }, { "epoch": 0.64, "grad_norm": 49.26206179629386, "learning_rate": 4.6281362007168456e-07, "logits/generated": -1.7525596618652344, "logits/oppo_generated": -2.7462942600250244, "logits/oppo_real": -2.756624221801758, "logits/real": -1.8544926643371582, "logps/generated": -438.41241455078125, "logps/oppo_gen": -43.12284851074219, "logps/oppo_real": -88.672607421875, "logps/real": -175.98345947265625, "loss": -1.4557, "loss/gen": 0.6103305816650391, "loss/real": -1.1268913745880127, "rewards/accuracies": 0.75, "rewards/generated": -395.28961181640625, "rewards/margins": 307.978759765625, "rewards/real": -87.31085968017578, "step": 123 }, { "epoch": 0.65, "grad_norm": 61.709413850825875, "learning_rate": 4.6236559139784944e-07, "logits/generated": -1.9703348875045776, "logits/oppo_generated": -3.2303848266601562, "logits/oppo_real": -3.089721918106079, "logits/real": -2.517977714538574, "logps/generated": -519.572021484375, "logps/oppo_gen": -85.11558532714844, "logps/oppo_real": -363.27288818359375, "logps/real": -372.19915771484375, "loss": -1.6645, "loss/gen": 0.11303215473890305, "loss/real": -1.9107370376586914, "rewards/accuracies": 1.0, "rewards/generated": -434.4564208984375, "rewards/margins": 425.5301513671875, "rewards/real": -8.92629623413086, "step": 124 }, { "epoch": 0.65, "grad_norm": 88.81122448063645, "learning_rate": 4.619175627240143e-07, "logits/generated": -1.938302993774414, "logits/oppo_generated": -2.8577804565429688, "logits/oppo_real": -2.9372658729553223, "logits/real": -2.0496668815612793, "logps/generated": -748.061767578125, "logps/oppo_gen": -77.513916015625, "logps/oppo_real": -263.41583251953125, "logps/real": -329.33160400390625, "loss": -1.5934, "loss/gen": 0.13084131479263306, "loss/real": -1.3408421277999878, "rewards/accuracies": 1.0, "rewards/generated": -670.5478515625, "rewards/margins": 604.6320190429688, "rewards/real": -65.91577911376953, "step": 125 }, { "epoch": 0.66, "grad_norm": 83.68007225252317, "learning_rate": 4.614695340501792e-07, "logits/generated": -1.7787394523620605, "logits/oppo_generated": -2.7984981536865234, "logits/oppo_real": -2.694584369659424, "logits/real": -2.0291075706481934, "logps/generated": -396.68927001953125, "logps/oppo_gen": -49.8719596862793, "logps/oppo_real": -201.35671997070312, "logps/real": -242.06234741210938, "loss": -1.5737, "loss/gen": 0.6225491762161255, "loss/real": -1.5929436683654785, "rewards/accuracies": 1.0, "rewards/generated": -346.81732177734375, "rewards/margins": 306.1116943359375, "rewards/real": -40.70562744140625, "step": 126 }, { "epoch": 0.66, "grad_norm": 66.71267170790156, "learning_rate": 4.6102150537634403e-07, "logits/generated": -2.229249954223633, "logits/oppo_generated": -2.8614678382873535, "logits/oppo_real": -3.104336738586426, "logits/real": -2.1488466262817383, "logps/generated": -631.5731201171875, "logps/oppo_gen": -65.24995422363281, "logps/oppo_real": -279.1671142578125, "logps/real": -288.8418273925781, "loss": -1.5765, "loss/gen": 0.17149776220321655, "loss/real": -1.9032527208328247, "rewards/accuracies": 1.0, "rewards/generated": -566.3231201171875, "rewards/margins": 556.6484375, "rewards/real": -9.67473030090332, "step": 127 }, { "epoch": 0.67, "grad_norm": 169.06562497111884, "learning_rate": 4.6057347670250897e-07, "logits/generated": -1.8923313617706299, "logits/oppo_generated": -2.743807554244995, "logits/oppo_real": -2.8210201263427734, "logits/real": -2.0257139205932617, "logps/generated": -499.35198974609375, "logps/oppo_gen": -89.97515869140625, "logps/oppo_real": -332.08160400390625, "logps/real": -333.4979248046875, "loss": -1.5969, "loss/gen": 0.5354217886924744, "loss/real": -1.9858367443084717, "rewards/accuracies": 1.0, "rewards/generated": -409.3768615722656, "rewards/margins": 407.96051025390625, "rewards/real": -1.4163341522216797, "step": 128 }, { "epoch": 0.67, "grad_norm": 46.82037963556842, "learning_rate": 4.601254480286738e-07, "logits/generated": -1.4846677780151367, "logits/oppo_generated": -2.88552188873291, "logits/oppo_real": -2.462414264678955, "logits/real": -2.096205472946167, "logps/generated": -536.2479248046875, "logps/oppo_gen": -83.80229949951172, "logps/oppo_real": -202.01084899902344, "logps/real": -235.54244995117188, "loss": -1.6622, "loss/gen": 0.28287458419799805, "loss/real": -1.6646840572357178, "rewards/accuracies": 0.875, "rewards/generated": -452.4456481933594, "rewards/margins": 418.9140625, "rewards/real": -33.531593322753906, "step": 129 }, { "epoch": 0.68, "grad_norm": 62.38898539042669, "learning_rate": 4.596774193548387e-07, "logits/generated": -1.2318034172058105, "logits/oppo_generated": -2.284450054168701, "logits/oppo_real": -2.3912582397460938, "logits/real": -1.4682029485702515, "logps/generated": -635.2818603515625, "logps/oppo_gen": -65.28082275390625, "logps/oppo_real": -298.7229919433594, "logps/real": -261.49755859375, "loss": -1.6128, "loss/gen": 0.3564888834953308, "loss/real": -2.3722541332244873, "rewards/accuracies": 1.0, "rewards/generated": -570.0010986328125, "rewards/margins": 607.2265625, "rewards/real": 37.22542953491211, "step": 130 }, { "epoch": 0.68, "grad_norm": 125.28287523889688, "learning_rate": 4.5922939068100356e-07, "logits/generated": -1.7798149585723877, "logits/oppo_generated": -2.862884044647217, "logits/oppo_real": -3.036806344985962, "logits/real": -1.983008861541748, "logps/generated": -748.7784423828125, "logps/oppo_gen": -90.15806579589844, "logps/oppo_real": -274.20635986328125, "logps/real": -275.6498718261719, "loss": -1.5349, "loss/gen": 0.04587027058005333, "loss/real": -1.985565185546875, "rewards/accuracies": 1.0, "rewards/generated": -658.620361328125, "rewards/margins": 657.1768798828125, "rewards/real": -1.4434819221496582, "step": 131 }, { "epoch": 0.69, "grad_norm": 66.76624114798773, "learning_rate": 4.5878136200716844e-07, "logits/generated": -1.8081977367401123, "logits/oppo_generated": -2.962895393371582, "logits/oppo_real": -2.877319812774658, "logits/real": -2.0002503395080566, "logps/generated": -730.475341796875, "logps/oppo_gen": -100.83236694335938, "logps/oppo_real": -181.53245544433594, "logps/real": -265.831298828125, "loss": -1.5555, "loss/gen": 0.8545611500740051, "loss/real": -1.1570115089416504, "rewards/accuracies": 0.875, "rewards/generated": -629.6429443359375, "rewards/margins": 545.3441162109375, "rewards/real": -84.29884338378906, "step": 132 }, { "epoch": 0.69, "grad_norm": 58.158852709088926, "learning_rate": 4.5833333333333327e-07, "logits/generated": -1.9816755056381226, "logits/oppo_generated": -2.6472039222717285, "logits/oppo_real": -2.741997241973877, "logits/real": -1.9651538133621216, "logps/generated": -828.5115356445312, "logps/oppo_gen": -144.51702880859375, "logps/oppo_real": -452.9317626953125, "logps/real": -480.84735107421875, "loss": -1.7693, "loss/gen": 0.09539352357387543, "loss/real": -1.720844030380249, "rewards/accuracies": 1.0, "rewards/generated": -683.9945068359375, "rewards/margins": 656.0789184570312, "rewards/real": -27.915592193603516, "step": 133 }, { "epoch": 0.7, "grad_norm": 90.54872868033543, "learning_rate": 4.578853046594982e-07, "logits/generated": -2.1033763885498047, "logits/oppo_generated": -2.739530086517334, "logits/oppo_real": -2.9463746547698975, "logits/real": -2.074946880340576, "logps/generated": -545.0913696289062, "logps/oppo_gen": -90.12626647949219, "logps/oppo_real": -418.7986755371094, "logps/real": -383.2454528808594, "loss": -1.6797, "loss/gen": 0.09754064679145813, "loss/real": -2.35553240776062, "rewards/accuracies": 1.0, "rewards/generated": -454.965087890625, "rewards/margins": 490.51837158203125, "rewards/real": 35.55324935913086, "step": 134 }, { "epoch": 0.7, "grad_norm": 83.66236669831007, "learning_rate": 4.574372759856631e-07, "logits/generated": -1.986171007156372, "logits/oppo_generated": -2.7536940574645996, "logits/oppo_real": -3.0076608657836914, "logits/real": -2.0253145694732666, "logps/generated": -483.1329345703125, "logps/oppo_gen": -57.10042190551758, "logps/oppo_real": -238.064697265625, "logps/real": -238.10501098632812, "loss": -1.6896, "loss/gen": 0.1678832322359085, "loss/real": -1.9995965957641602, "rewards/accuracies": 1.0, "rewards/generated": -426.032470703125, "rewards/margins": 425.9921569824219, "rewards/real": -0.04033064842224121, "step": 135 }, { "epoch": 0.71, "grad_norm": 67.85836942324248, "learning_rate": 4.569892473118279e-07, "logits/generated": -1.7882883548736572, "logits/oppo_generated": -2.4256725311279297, "logits/oppo_real": -2.8077471256256104, "logits/real": -1.5553542375564575, "logps/generated": -575.3634643554688, "logps/oppo_gen": -58.635196685791016, "logps/oppo_real": -250.21864318847656, "logps/real": -216.97225952148438, "loss": -1.7029, "loss/gen": 0.1162588894367218, "loss/real": -2.3324639797210693, "rewards/accuracies": 1.0, "rewards/generated": -516.728271484375, "rewards/margins": 549.974609375, "rewards/real": 33.24639892578125, "step": 136 }, { "epoch": 0.72, "grad_norm": 73.62041051060501, "learning_rate": 4.5654121863799285e-07, "logits/generated": -1.89347505569458, "logits/oppo_generated": -2.624286651611328, "logits/oppo_real": -2.8371405601501465, "logits/real": -1.9667630195617676, "logps/generated": -548.8859252929688, "logps/oppo_gen": -106.68203735351562, "logps/oppo_real": -485.33148193359375, "logps/real": -392.25909423828125, "loss": -1.6779, "loss/gen": 0.1170816719532013, "loss/real": -2.9307241439819336, "rewards/accuracies": 1.0, "rewards/generated": -442.203857421875, "rewards/margins": 535.2762451171875, "rewards/real": 93.0723876953125, "step": 137 }, { "epoch": 0.72, "grad_norm": 82.21296242932893, "learning_rate": 4.560931899641577e-07, "logits/generated": -1.917464256286621, "logits/oppo_generated": -2.9165024757385254, "logits/oppo_real": -2.8914356231689453, "logits/real": -2.142932415008545, "logps/generated": -1235.953369140625, "logps/oppo_gen": -83.79830169677734, "logps/oppo_real": -193.01220703125, "logps/real": -226.86917114257812, "loss": -1.7045, "loss/gen": 0.0816095620393753, "loss/real": -1.6614303588867188, "rewards/accuracies": 1.0, "rewards/generated": -1152.155029296875, "rewards/margins": 1118.2979736328125, "rewards/real": -33.856971740722656, "step": 138 }, { "epoch": 0.73, "grad_norm": 54.73798339518798, "learning_rate": 4.5564516129032256e-07, "logits/generated": -2.137822389602661, "logits/oppo_generated": -2.9398818016052246, "logits/oppo_real": -3.185572624206543, "logits/real": -2.284759044647217, "logps/generated": -675.8824462890625, "logps/oppo_gen": -117.77006530761719, "logps/oppo_real": -377.13311767578125, "logps/real": -368.2445373535156, "loss": -1.8385, "loss/gen": 0.03665899857878685, "loss/real": -2.08888578414917, "rewards/accuracies": 1.0, "rewards/generated": -558.1124267578125, "rewards/margins": 567.0009765625, "rewards/real": 8.888594627380371, "step": 139 }, { "epoch": 0.73, "grad_norm": 63.7378626619904, "learning_rate": 4.5519713261648744e-07, "logits/generated": -1.9477636814117432, "logits/oppo_generated": -2.771664619445801, "logits/oppo_real": -2.8211355209350586, "logits/real": -2.0558881759643555, "logps/generated": -591.1102294921875, "logps/oppo_gen": -88.96675109863281, "logps/oppo_real": -326.7928466796875, "logps/real": -351.2818603515625, "loss": -1.7565, "loss/gen": 0.07419906556606293, "loss/real": -1.7551099061965942, "rewards/accuracies": 1.0, "rewards/generated": -502.1435241699219, "rewards/margins": 477.6545104980469, "rewards/real": -24.489017486572266, "step": 140 }, { "epoch": 0.74, "grad_norm": 102.74617921728282, "learning_rate": 4.547491039426523e-07, "logits/generated": -1.9030685424804688, "logits/oppo_generated": -2.7817769050598145, "logits/oppo_real": -2.727473258972168, "logits/real": -2.137336492538452, "logps/generated": -511.839599609375, "logps/oppo_gen": -66.81544494628906, "logps/oppo_real": -323.5364685058594, "logps/real": -293.64990234375, "loss": -1.7376, "loss/gen": 0.10144515335559845, "loss/real": -2.29886531829834, "rewards/accuracies": 1.0, "rewards/generated": -445.0241394042969, "rewards/margins": 474.9106750488281, "rewards/real": 29.886547088623047, "step": 141 }, { "epoch": 0.74, "grad_norm": 72.97101508797003, "learning_rate": 4.5430107526881715e-07, "logits/generated": -2.1376824378967285, "logits/oppo_generated": -2.7992939949035645, "logits/oppo_real": -2.914294719696045, "logits/real": -2.1265478134155273, "logps/generated": -582.9034423828125, "logps/oppo_gen": -68.68360900878906, "logps/oppo_real": -253.99221801757812, "logps/real": -252.176025390625, "loss": -1.7525, "loss/gen": 0.07931329309940338, "loss/real": -2.018162250518799, "rewards/accuracies": 1.0, "rewards/generated": -514.2198486328125, "rewards/margins": 516.0361328125, "rewards/real": 1.8162250518798828, "step": 142 }, { "epoch": 0.75, "grad_norm": 84.03182525835078, "learning_rate": 4.538530465949821e-07, "logits/generated": -1.9512498378753662, "logits/oppo_generated": -2.6728546619415283, "logits/oppo_real": -2.74894380569458, "logits/real": -1.980553388595581, "logps/generated": -495.83770751953125, "logps/oppo_gen": -80.91419982910156, "logps/oppo_real": -178.15316772460938, "logps/real": -180.73532104492188, "loss": -1.6561, "loss/gen": 0.21434536576271057, "loss/real": -1.9741783142089844, "rewards/accuracies": 1.0, "rewards/generated": -414.92352294921875, "rewards/margins": 412.34136962890625, "rewards/real": -2.5821542739868164, "step": 143 }, { "epoch": 0.75, "grad_norm": 122.909519566224, "learning_rate": 4.534050179211469e-07, "logits/generated": -1.907271385192871, "logits/oppo_generated": -2.649775981903076, "logits/oppo_real": -2.8270368576049805, "logits/real": -1.920291543006897, "logps/generated": -488.8565673828125, "logps/oppo_gen": -64.18344116210938, "logps/oppo_real": -235.9340057373047, "logps/real": -287.48065185546875, "loss": -1.602, "loss/gen": 0.21500566601753235, "loss/real": -1.4845335483551025, "rewards/accuracies": 1.0, "rewards/generated": -424.673095703125, "rewards/margins": 373.12646484375, "rewards/real": -51.5466423034668, "step": 144 }, { "epoch": 0.76, "grad_norm": 96.94029553512564, "learning_rate": 4.529569892473118e-07, "logits/generated": -2.026323080062866, "logits/oppo_generated": -2.943478584289551, "logits/oppo_real": -2.859900951385498, "logits/real": -2.257244825363159, "logps/generated": -603.081787109375, "logps/oppo_gen": -85.38736724853516, "logps/oppo_real": -270.22747802734375, "logps/real": -247.45028686523438, "loss": -1.6835, "loss/gen": 0.0911005511879921, "loss/real": -2.2277719974517822, "rewards/accuracies": 1.0, "rewards/generated": -517.6943969726562, "rewards/margins": 540.4715576171875, "rewards/real": 22.777204513549805, "step": 145 }, { "epoch": 0.76, "grad_norm": 65.43579495749059, "learning_rate": 4.5250896057347673e-07, "logits/generated": -2.3204407691955566, "logits/oppo_generated": -2.9215641021728516, "logits/oppo_real": -3.185364246368408, "logits/real": -2.2590651512145996, "logps/generated": -651.804443359375, "logps/oppo_gen": -160.45762634277344, "logps/oppo_real": -471.71771240234375, "logps/real": -498.5760803222656, "loss": -1.7785, "loss/gen": 0.08252400159835815, "loss/real": -1.7314162254333496, "rewards/accuracies": 1.0, "rewards/generated": -491.3468322753906, "rewards/margins": 464.48846435546875, "rewards/real": -26.858369827270508, "step": 146 }, { "epoch": 0.77, "grad_norm": 97.98889346343033, "learning_rate": 4.5206093189964156e-07, "logits/generated": -2.0279428958892822, "logits/oppo_generated": -2.8301095962524414, "logits/oppo_real": -2.9588708877563477, "logits/real": -1.925227403640747, "logps/generated": -647.3221435546875, "logps/oppo_gen": -65.44461059570312, "logps/oppo_real": -184.59007263183594, "logps/real": -213.09719848632812, "loss": -1.7926, "loss/gen": 0.09245370328426361, "loss/real": -1.7149286270141602, "rewards/accuracies": 1.0, "rewards/generated": -581.87744140625, "rewards/margins": 553.3703002929688, "rewards/real": -28.50714111328125, "step": 147 }, { "epoch": 0.77, "grad_norm": 101.17580711470751, "learning_rate": 4.5161290322580644e-07, "logits/generated": -1.8266165256500244, "logits/oppo_generated": -3.001574993133545, "logits/oppo_real": -2.8634276390075684, "logits/real": -2.2667245864868164, "logps/generated": -614.9871215820312, "logps/oppo_gen": -83.24380493164062, "logps/oppo_real": -339.7986755371094, "logps/real": -321.4783935546875, "loss": -1.7798, "loss/gen": 0.04210636392235756, "loss/real": -2.1832029819488525, "rewards/accuracies": 1.0, "rewards/generated": -531.7432861328125, "rewards/margins": 550.0635986328125, "rewards/real": 18.320310592651367, "step": 148 }, { "epoch": 0.78, "grad_norm": 125.15491182180986, "learning_rate": 4.511648745519713e-07, "logits/generated": -1.8712575435638428, "logits/oppo_generated": -2.7472705841064453, "logits/oppo_real": -2.8078293800354004, "logits/real": -1.9940369129180908, "logps/generated": -590.0716552734375, "logps/oppo_gen": -87.50840759277344, "logps/oppo_real": -388.77752685546875, "logps/real": -361.9477233886719, "loss": -1.8277, "loss/gen": 0.14459219574928284, "loss/real": -2.2682981491088867, "rewards/accuracies": 1.0, "rewards/generated": -502.56329345703125, "rewards/margins": 529.39306640625, "rewards/real": 26.829811096191406, "step": 149 }, { "epoch": 0.78, "grad_norm": 130.95518973584055, "learning_rate": 4.507168458781362e-07, "logits/generated": -1.9332165718078613, "logits/oppo_generated": -2.7329964637756348, "logits/oppo_real": -2.917022705078125, "logits/real": -1.8709020614624023, "logps/generated": -633.012939453125, "logps/oppo_gen": -51.14801788330078, "logps/oppo_real": -223.8541717529297, "logps/real": -240.81948852539062, "loss": -1.7284, "loss/gen": 0.03336421027779579, "loss/real": -1.830346703529358, "rewards/accuracies": 1.0, "rewards/generated": -581.8649291992188, "rewards/margins": 564.8995971679688, "rewards/real": -16.965333938598633, "step": 150 }, { "epoch": 0.79, "grad_norm": 72.55144659029033, "learning_rate": 4.5026881720430103e-07, "logits/generated": -1.6202969551086426, "logits/oppo_generated": -2.933967113494873, "logits/oppo_real": -2.779536247253418, "logits/real": -2.1590824127197266, "logps/generated": -601.2861328125, "logps/oppo_gen": -59.4964599609375, "logps/oppo_real": -305.5668029785156, "logps/real": -321.80731201171875, "loss": -1.6793, "loss/gen": 0.09511305391788483, "loss/real": -1.8375946283340454, "rewards/accuracies": 1.0, "rewards/generated": -541.7896728515625, "rewards/margins": 525.5491333007812, "rewards/real": -16.240537643432617, "step": 151 }, { "epoch": 0.79, "grad_norm": 60.15745737424725, "learning_rate": 4.4982078853046596e-07, "logits/generated": -1.8742468357086182, "logits/oppo_generated": -2.7897162437438965, "logits/oppo_real": -2.9050936698913574, "logits/real": -1.9126145839691162, "logps/generated": -610.4820556640625, "logps/oppo_gen": -68.6431884765625, "logps/oppo_real": -279.5192565917969, "logps/real": -256.11859130859375, "loss": -1.8501, "loss/gen": 0.03932388871908188, "loss/real": -2.234006881713867, "rewards/accuracies": 1.0, "rewards/generated": -541.8388671875, "rewards/margins": 565.239501953125, "rewards/real": 23.400684356689453, "step": 152 }, { "epoch": 0.8, "grad_norm": 51.045033636822836, "learning_rate": 4.493727598566308e-07, "logits/generated": -2.009788990020752, "logits/oppo_generated": -2.852003574371338, "logits/oppo_real": -3.0418591499328613, "logits/real": -2.0836453437805176, "logps/generated": -653.099365234375, "logps/oppo_gen": -87.85763549804688, "logps/oppo_real": -353.83575439453125, "logps/real": -326.4531555175781, "loss": -1.8601, "loss/gen": 0.03006863407790661, "loss/real": -2.2738256454467773, "rewards/accuracies": 1.0, "rewards/generated": -565.2417602539062, "rewards/margins": 592.6243896484375, "rewards/real": 27.38258934020996, "step": 153 }, { "epoch": 0.8, "grad_norm": 38.445754639552696, "learning_rate": 4.489247311827957e-07, "logits/generated": -1.6755175590515137, "logits/oppo_generated": -2.8767549991607666, "logits/oppo_real": -2.8858747482299805, "logits/real": -2.09330677986145, "logps/generated": -611.26708984375, "logps/oppo_gen": -81.34989166259766, "logps/oppo_real": -298.17315673828125, "logps/real": -290.9176025390625, "loss": -1.8951, "loss/gen": 0.3788173794746399, "loss/real": -2.0725557804107666, "rewards/accuracies": 0.875, "rewards/generated": -529.917236328125, "rewards/margins": 537.1728515625, "rewards/real": 7.255581855773926, "step": 154 }, { "epoch": 0.81, "grad_norm": 59.257956183906224, "learning_rate": 4.4847670250896056e-07, "logits/generated": -1.944122314453125, "logits/oppo_generated": -2.9232120513916016, "logits/oppo_real": -2.9117484092712402, "logits/real": -1.9138293266296387, "logps/generated": -694.2562255859375, "logps/oppo_gen": -70.24262237548828, "logps/oppo_real": -278.1219482421875, "logps/real": -255.45455932617188, "loss": -1.6892, "loss/gen": 0.13799193501472473, "loss/real": -2.2266738414764404, "rewards/accuracies": 1.0, "rewards/generated": -624.0136108398438, "rewards/margins": 646.6810302734375, "rewards/real": 22.667388916015625, "step": 155 }, { "epoch": 0.81, "grad_norm": 47.609623707095714, "learning_rate": 4.4802867383512544e-07, "logits/generated": -0.9755500555038452, "logits/oppo_generated": -2.88938045501709, "logits/oppo_real": -2.7122931480407715, "logits/real": -1.8035005331039429, "logps/generated": -692.7864379882812, "logps/oppo_gen": -54.18265914916992, "logps/oppo_real": -184.002197265625, "logps/real": -166.90866088867188, "loss": -1.8166, "loss/gen": 0.05148601904511452, "loss/real": -2.1709353923797607, "rewards/accuracies": 1.0, "rewards/generated": -638.603759765625, "rewards/margins": 655.6973266601562, "rewards/real": 17.09354019165039, "step": 156 }, { "epoch": 0.82, "grad_norm": 58.80905235311886, "learning_rate": 4.475806451612903e-07, "logits/generated": -0.8548814654350281, "logits/oppo_generated": -2.9243669509887695, "logits/oppo_real": -2.7173829078674316, "logits/real": -1.9759352207183838, "logps/generated": -570.697021484375, "logps/oppo_gen": -67.00720977783203, "logps/oppo_real": -217.82373046875, "logps/real": -219.30230712890625, "loss": -1.6971, "loss/gen": 0.40054354071617126, "loss/real": -1.9852139949798584, "rewards/accuracies": 1.0, "rewards/generated": -503.6898498535156, "rewards/margins": 502.21124267578125, "rewards/real": -1.4785995483398438, "step": 157 }, { "epoch": 0.83, "grad_norm": 47.13598957985156, "learning_rate": 4.4713261648745515e-07, "logits/generated": -1.7058589458465576, "logits/oppo_generated": -2.831021308898926, "logits/oppo_real": -2.949223756790161, "logits/real": -1.647479772567749, "logps/generated": -702.8096923828125, "logps/oppo_gen": -56.023048400878906, "logps/oppo_real": -286.0043640136719, "logps/real": -275.82373046875, "loss": -1.8097, "loss/gen": 0.04578549787402153, "loss/real": -2.101806640625, "rewards/accuracies": 1.0, "rewards/generated": -646.7866821289062, "rewards/margins": 656.9673461914062, "rewards/real": 10.180654525756836, "step": 158 }, { "epoch": 0.83, "grad_norm": 46.640897193510604, "learning_rate": 4.466845878136201e-07, "logits/generated": -1.0372296571731567, "logits/oppo_generated": -2.9866466522216797, "logits/oppo_real": -2.8740952014923096, "logits/real": -2.0625128746032715, "logps/generated": -717.514892578125, "logps/oppo_gen": -74.18051147460938, "logps/oppo_real": -289.81561279296875, "logps/real": -262.28466796875, "loss": -1.9636, "loss/gen": 0.028024822473526, "loss/real": -2.2753095626831055, "rewards/accuracies": 1.0, "rewards/generated": -643.3343505859375, "rewards/margins": 670.8653564453125, "rewards/real": 27.53096580505371, "step": 159 }, { "epoch": 0.84, "grad_norm": 86.45016402167784, "learning_rate": 4.462365591397849e-07, "logits/generated": -1.1898678541183472, "logits/oppo_generated": -2.7843871116638184, "logits/oppo_real": -2.832613945007324, "logits/real": -1.442640781402588, "logps/generated": -915.6947631835938, "logps/oppo_gen": -64.36344909667969, "logps/oppo_real": -354.620361328125, "logps/real": -332.0544738769531, "loss": -1.9388, "loss/gen": 0.12847158312797546, "loss/real": -2.225658655166626, "rewards/accuracies": 1.0, "rewards/generated": -851.3313598632812, "rewards/margins": 873.897216796875, "rewards/real": 22.56588363647461, "step": 160 }, { "epoch": 0.84, "grad_norm": 60.7973006602935, "learning_rate": 4.457885304659498e-07, "logits/generated": -1.986168622970581, "logits/oppo_generated": -2.730388879776001, "logits/oppo_real": -2.9984025955200195, "logits/real": -1.6806275844573975, "logps/generated": -885.4449462890625, "logps/oppo_gen": -143.77706909179688, "logps/oppo_real": -439.7186279296875, "logps/real": -421.04217529296875, "loss": -1.8178, "loss/gen": 0.028874732553958893, "loss/real": -2.1867642402648926, "rewards/accuracies": 1.0, "rewards/generated": -741.6678466796875, "rewards/margins": 760.34423828125, "rewards/real": 18.676427841186523, "step": 161 }, { "epoch": 0.85, "grad_norm": 84.26484997557552, "learning_rate": 4.4534050179211467e-07, "logits/generated": -1.9005441665649414, "logits/oppo_generated": -2.915806770324707, "logits/oppo_real": -3.1570920944213867, "logits/real": -1.8480937480926514, "logps/generated": -750.3797607421875, "logps/oppo_gen": -118.90010070800781, "logps/oppo_real": -341.41363525390625, "logps/real": -317.5443115234375, "loss": -1.8443, "loss/gen": 0.14120692014694214, "loss/real": -2.2386932373046875, "rewards/accuracies": 1.0, "rewards/generated": -631.4796142578125, "rewards/margins": 655.3489990234375, "rewards/real": 23.86932373046875, "step": 162 }, { "epoch": 0.85, "grad_norm": 143.02717726886732, "learning_rate": 4.4489247311827955e-07, "logits/generated": -1.4650170803070068, "logits/oppo_generated": -2.837372303009033, "logits/oppo_real": -2.9020771980285645, "logits/real": -1.7127195596694946, "logps/generated": -478.16339111328125, "logps/oppo_gen": -67.94302368164062, "logps/oppo_real": -255.73797607421875, "logps/real": -229.78802490234375, "loss": -1.8535, "loss/gen": 0.3787376582622528, "loss/real": -2.2594995498657227, "rewards/accuracies": 1.0, "rewards/generated": -410.22039794921875, "rewards/margins": 436.1703186035156, "rewards/real": 25.949939727783203, "step": 163 }, { "epoch": 0.86, "grad_norm": 62.02056150042601, "learning_rate": 4.444444444444444e-07, "logits/generated": -1.8700604438781738, "logits/oppo_generated": -2.756680727005005, "logits/oppo_real": -3.0085153579711914, "logits/real": -1.58890962600708, "logps/generated": -702.9028930664062, "logps/oppo_gen": -85.88131713867188, "logps/oppo_real": -249.00379943847656, "logps/real": -246.01797485351562, "loss": -1.8894, "loss/gen": 0.031073393300175667, "loss/real": -2.029858350753784, "rewards/accuracies": 1.0, "rewards/generated": -617.0215454101562, "rewards/margins": 620.0074462890625, "rewards/real": 2.9858341217041016, "step": 164 }, { "epoch": 0.86, "grad_norm": 61.25214042816177, "learning_rate": 4.439964157706093e-07, "logits/generated": -0.981428325176239, "logits/oppo_generated": -2.7159523963928223, "logits/oppo_real": -2.960238218307495, "logits/real": -1.0548228025436401, "logps/generated": -598.4140625, "logps/oppo_gen": -58.635005950927734, "logps/oppo_real": -400.1387634277344, "logps/real": -425.5389404296875, "loss": -1.8947, "loss/gen": 0.1158125251531601, "loss/real": -1.7459979057312012, "rewards/accuracies": 1.0, "rewards/generated": -539.779052734375, "rewards/margins": 514.37890625, "rewards/real": -25.400211334228516, "step": 165 }, { "epoch": 0.87, "grad_norm": 81.0536118151514, "learning_rate": 4.4354838709677415e-07, "logits/generated": -1.4463560581207275, "logits/oppo_generated": -2.678307294845581, "logits/oppo_real": -2.8269057273864746, "logits/real": -1.380929708480835, "logps/generated": -846.7905883789062, "logps/oppo_gen": -67.97695922851562, "logps/oppo_real": -219.36227416992188, "logps/real": -325.5843505859375, "loss": -1.93, "loss/gen": 0.5336862206459045, "loss/real": -0.9377790689468384, "rewards/accuracies": 1.0, "rewards/generated": -778.8135986328125, "rewards/margins": 672.591552734375, "rewards/real": -106.22209167480469, "step": 166 }, { "epoch": 0.87, "grad_norm": 66.29716039592118, "learning_rate": 4.4310035842293903e-07, "logits/generated": -2.113171100616455, "logits/oppo_generated": -2.955277442932129, "logits/oppo_real": -3.2049663066864014, "logits/real": -2.1531317234039307, "logps/generated": -731.0936279296875, "logps/oppo_gen": -87.20503234863281, "logps/oppo_real": -395.21441650390625, "logps/real": -363.5802001953125, "loss": -1.8696, "loss/gen": 0.11870712786912918, "loss/real": -2.3163421154022217, "rewards/accuracies": 1.0, "rewards/generated": -643.8885498046875, "rewards/margins": 675.5227661132812, "rewards/real": 31.634204864501953, "step": 167 }, { "epoch": 0.88, "grad_norm": 66.8398673778887, "learning_rate": 4.4265232974910396e-07, "logits/generated": -1.6186769008636475, "logits/oppo_generated": -2.8609347343444824, "logits/oppo_real": -2.865668773651123, "logits/real": -1.6237159967422485, "logps/generated": -770.720703125, "logps/oppo_gen": -97.59341430664062, "logps/oppo_real": -205.549560546875, "logps/real": -184.6165771484375, "loss": -1.8003, "loss/gen": 0.018834060057997704, "loss/real": -2.2093300819396973, "rewards/accuracies": 1.0, "rewards/generated": -673.1272583007812, "rewards/margins": 694.060302734375, "rewards/real": 20.9329833984375, "step": 168 }, { "epoch": 0.88, "grad_norm": 54.69193731997889, "learning_rate": 4.422043010752688e-07, "logits/generated": -2.053788900375366, "logits/oppo_generated": -2.971303939819336, "logits/oppo_real": -3.011564016342163, "logits/real": -2.120933771133423, "logps/generated": -695.5224609375, "logps/oppo_gen": -89.73414611816406, "logps/oppo_real": -350.30523681640625, "logps/real": -328.1854248046875, "loss": -1.9518, "loss/gen": 0.09373271465301514, "loss/real": -2.2211976051330566, "rewards/accuracies": 1.0, "rewards/generated": -605.788330078125, "rewards/margins": 627.9080810546875, "rewards/real": 22.119773864746094, "step": 169 }, { "epoch": 0.89, "grad_norm": 52.319817289914155, "learning_rate": 4.4175627240143367e-07, "logits/generated": -1.4142050743103027, "logits/oppo_generated": -2.840271472930908, "logits/oppo_real": -2.7079410552978516, "logits/real": -2.1080374717712402, "logps/generated": -555.2352905273438, "logps/oppo_gen": -53.288421630859375, "logps/oppo_real": -155.62603759765625, "logps/real": -192.53866577148438, "loss": -1.7625, "loss/gen": 0.1426987648010254, "loss/real": -1.6308739185333252, "rewards/accuracies": 1.0, "rewards/generated": -501.94683837890625, "rewards/margins": 465.03424072265625, "rewards/real": -36.9126091003418, "step": 170 }, { "epoch": 0.89, "grad_norm": 58.41200981497726, "learning_rate": 4.4130824372759855e-07, "logits/generated": -1.7520662546157837, "logits/oppo_generated": -3.0052433013916016, "logits/oppo_real": -2.8851370811462402, "logits/real": -2.3384604454040527, "logps/generated": -719.3140258789062, "logps/oppo_gen": -76.29854583740234, "logps/oppo_real": -393.45806884765625, "logps/real": -362.9105224609375, "loss": -1.877, "loss/gen": 0.030190223827958107, "loss/real": -2.3054752349853516, "rewards/accuracies": 1.0, "rewards/generated": -643.0155029296875, "rewards/margins": 673.56298828125, "rewards/real": 30.547513961791992, "step": 171 }, { "epoch": 0.9, "grad_norm": 48.523557624428676, "learning_rate": 4.4086021505376344e-07, "logits/generated": -1.3822331428527832, "logits/oppo_generated": -2.7603323459625244, "logits/oppo_real": -2.456112861633301, "logits/real": -2.125643253326416, "logps/generated": -608.9871826171875, "logps/oppo_gen": -69.90534210205078, "logps/oppo_real": -236.9437255859375, "logps/real": -226.87928771972656, "loss": -1.9436, "loss/gen": 0.6278254389762878, "loss/real": -2.100644111633301, "rewards/accuracies": 1.0, "rewards/generated": -539.081787109375, "rewards/margins": 549.146240234375, "rewards/real": 10.06441879272461, "step": 172 }, { "epoch": 0.9, "grad_norm": 64.36034041215716, "learning_rate": 4.4041218637992826e-07, "logits/generated": -1.5218685865402222, "logits/oppo_generated": -2.696648597717285, "logits/oppo_real": -2.8764772415161133, "logits/real": -1.5772918462753296, "logps/generated": -611.6922607421875, "logps/oppo_gen": -87.64535522460938, "logps/oppo_real": -305.02203369140625, "logps/real": -342.9971618652344, "loss": -1.7637, "loss/gen": 0.49089139699935913, "loss/real": -1.620248556137085, "rewards/accuracies": 1.0, "rewards/generated": -524.046875, "rewards/margins": 486.0717468261719, "rewards/real": -37.975135803222656, "step": 173 }, { "epoch": 0.91, "grad_norm": 60.28151730111809, "learning_rate": 4.399641577060932e-07, "logits/generated": -1.8260202407836914, "logits/oppo_generated": -2.817328453063965, "logits/oppo_real": -2.961047649383545, "logits/real": -1.7334787845611572, "logps/generated": -653.2313842773438, "logps/oppo_gen": -62.92127990722656, "logps/oppo_real": -255.6164093017578, "logps/real": -285.0434265136719, "loss": -1.8254, "loss/gen": 0.058723676949739456, "loss/real": -1.7057299613952637, "rewards/accuracies": 1.0, "rewards/generated": -590.31005859375, "rewards/margins": 560.883056640625, "rewards/real": -29.427001953125, "step": 174 }, { "epoch": 0.91, "grad_norm": 99.86272804768053, "learning_rate": 4.3951612903225803e-07, "logits/generated": -1.548148512840271, "logits/oppo_generated": -2.848414897918701, "logits/oppo_real": -2.944563388824463, "logits/real": -2.128657102584839, "logps/generated": -713.473876953125, "logps/oppo_gen": -95.9730453491211, "logps/oppo_real": -342.0549621582031, "logps/real": -321.3438720703125, "loss": -1.8176, "loss/gen": 0.12640155851840973, "loss/real": -2.20711088180542, "rewards/accuracies": 1.0, "rewards/generated": -617.5008544921875, "rewards/margins": 638.2119140625, "rewards/real": 20.71107292175293, "step": 175 }, { "epoch": 0.92, "grad_norm": 47.58197849747712, "learning_rate": 4.390681003584229e-07, "logits/generated": -1.8227300643920898, "logits/oppo_generated": -2.591665744781494, "logits/oppo_real": -2.58309268951416, "logits/real": -2.0603384971618652, "logps/generated": -735.041259765625, "logps/oppo_gen": -46.3786735534668, "logps/oppo_real": -55.683685302734375, "logps/real": -42.93335723876953, "loss": -1.9832, "loss/gen": 0.15213513374328613, "loss/real": -2.1275031566619873, "rewards/accuracies": 1.0, "rewards/generated": -688.66259765625, "rewards/margins": 701.4129028320312, "rewards/real": 12.75033187866211, "step": 176 }, { "epoch": 0.92, "grad_norm": 85.49574242414793, "learning_rate": 4.386200716845878e-07, "logits/generated": -2.351722240447998, "logits/oppo_generated": -2.9608449935913086, "logits/oppo_real": -3.0993542671203613, "logits/real": -2.4843802452087402, "logps/generated": -788.0579833984375, "logps/oppo_gen": -151.79364013671875, "logps/oppo_real": -470.64190673828125, "logps/real": -452.0572814941406, "loss": -1.8285, "loss/gen": 0.014275267720222473, "loss/real": -2.1858463287353516, "rewards/accuracies": 1.0, "rewards/generated": -636.2643432617188, "rewards/margins": 654.8489990234375, "rewards/real": 18.584644317626953, "step": 177 }, { "epoch": 0.93, "grad_norm": 79.58229575021696, "learning_rate": 4.3817204301075267e-07, "logits/generated": -2.1911535263061523, "logits/oppo_generated": -2.955319404602051, "logits/oppo_real": -3.0618791580200195, "logits/real": -2.3935108184814453, "logps/generated": -570.6660766601562, "logps/oppo_gen": -117.42491149902344, "logps/oppo_real": -394.2685546875, "logps/real": -364.75994873046875, "loss": -1.9488, "loss/gen": 0.6023984551429749, "loss/real": -2.295085906982422, "rewards/accuracies": 1.0, "rewards/generated": -453.24114990234375, "rewards/margins": 482.749755859375, "rewards/real": 29.50858497619629, "step": 178 }, { "epoch": 0.93, "grad_norm": 70.38357171932063, "learning_rate": 4.377240143369175e-07, "logits/generated": -1.1671700477600098, "logits/oppo_generated": -2.867384433746338, "logits/oppo_real": -2.831949234008789, "logits/real": -1.4764811992645264, "logps/generated": -583.820068359375, "logps/oppo_gen": -63.369956970214844, "logps/oppo_real": -158.0244598388672, "logps/real": -278.0089416503906, "loss": -1.7578, "loss/gen": 0.7094503045082092, "loss/real": -0.8001553416252136, "rewards/accuracies": 0.875, "rewards/generated": -520.4500732421875, "rewards/margins": 400.4656066894531, "rewards/real": -119.98446655273438, "step": 179 }, { "epoch": 0.94, "grad_norm": 74.56631119765144, "learning_rate": 4.3727598566308243e-07, "logits/generated": -1.9085487127304077, "logits/oppo_generated": -2.6366734504699707, "logits/oppo_real": -2.7779293060302734, "logits/real": -2.0540764331817627, "logps/generated": -625.862060546875, "logps/oppo_gen": -67.02287292480469, "logps/oppo_real": -384.6851806640625, "logps/real": -352.558349609375, "loss": -1.7415, "loss/gen": 0.05960576981306076, "loss/real": -2.3212685585021973, "rewards/accuracies": 1.0, "rewards/generated": -558.8392333984375, "rewards/margins": 590.966064453125, "rewards/real": 32.12684631347656, "step": 180 }, { "epoch": 0.95, "grad_norm": 65.722717426606, "learning_rate": 4.368279569892473e-07, "logits/generated": -1.732559323310852, "logits/oppo_generated": -2.634063959121704, "logits/oppo_real": -2.9160523414611816, "logits/real": -1.951407551765442, "logps/generated": -677.4447631835938, "logps/oppo_gen": -60.14728546142578, "logps/oppo_real": -206.13426208496094, "logps/real": -179.4784393310547, "loss": -1.912, "loss/gen": 0.08394724130630493, "loss/real": -2.2665581703186035, "rewards/accuracies": 1.0, "rewards/generated": -617.2974853515625, "rewards/margins": 643.953369140625, "rewards/real": 26.65583038330078, "step": 181 }, { "epoch": 0.95, "grad_norm": 69.63255260876554, "learning_rate": 4.3637992831541214e-07, "logits/generated": -2.192533493041992, "logits/oppo_generated": -2.894730567932129, "logits/oppo_real": -3.0438637733459473, "logits/real": -2.3837125301361084, "logps/generated": -801.6678466796875, "logps/oppo_gen": -99.43392181396484, "logps/oppo_real": -454.5760192871094, "logps/real": -435.48876953125, "loss": -1.9334, "loss/gen": 0.007741004228591919, "loss/real": -2.1908724308013916, "rewards/accuracies": 1.0, "rewards/generated": -702.2339477539062, "rewards/margins": 721.3212280273438, "rewards/real": 19.087242126464844, "step": 182 }, { "epoch": 0.96, "grad_norm": 92.43653861907097, "learning_rate": 4.359318996415771e-07, "logits/generated": -1.6997716426849365, "logits/oppo_generated": -2.791623115539551, "logits/oppo_real": -2.808715581893921, "logits/real": -1.985607624053955, "logps/generated": -561.043212890625, "logps/oppo_gen": -85.10957336425781, "logps/oppo_real": -352.60321044921875, "logps/real": -316.6192626953125, "loss": -1.8481, "loss/gen": 0.3760773539543152, "loss/real": -2.35983943939209, "rewards/accuracies": 1.0, "rewards/generated": -475.93359375, "rewards/margins": 511.9176025390625, "rewards/real": 35.98394775390625, "step": 183 }, { "epoch": 0.96, "grad_norm": 55.39334883993858, "learning_rate": 4.354838709677419e-07, "logits/generated": -2.0199737548828125, "logits/oppo_generated": -2.7498302459716797, "logits/oppo_real": -3.0951387882232666, "logits/real": -2.1367290019989014, "logps/generated": -683.6644287109375, "logps/oppo_gen": -88.00972747802734, "logps/oppo_real": -444.2664794921875, "logps/real": -413.0567626953125, "loss": -1.9361, "loss/gen": 0.04308386147022247, "loss/real": -2.3120970726013184, "rewards/accuracies": 1.0, "rewards/generated": -595.65478515625, "rewards/margins": 626.864501953125, "rewards/real": 31.209705352783203, "step": 184 }, { "epoch": 0.97, "grad_norm": 91.63384592359965, "learning_rate": 4.350358422939068e-07, "logits/generated": -1.7172353267669678, "logits/oppo_generated": -2.704036235809326, "logits/oppo_real": -2.9259088039398193, "logits/real": -1.7819669246673584, "logps/generated": -828.3485717773438, "logps/oppo_gen": -92.61172485351562, "logps/oppo_real": -306.844970703125, "logps/real": -278.81842041015625, "loss": -1.8423, "loss/gen": 0.39729800820350647, "loss/real": -2.2802653312683105, "rewards/accuracies": 1.0, "rewards/generated": -735.73681640625, "rewards/margins": 763.7633056640625, "rewards/real": 28.02651596069336, "step": 185 }, { "epoch": 0.97, "grad_norm": 96.31854803366016, "learning_rate": 4.3458781362007167e-07, "logits/generated": -0.24818015098571777, "logits/oppo_generated": -2.877349853515625, "logits/oppo_real": -2.687556743621826, "logits/real": -1.999771237373352, "logps/generated": -567.3941650390625, "logps/oppo_gen": -45.4771728515625, "logps/oppo_real": -291.0182189941406, "logps/real": -295.58465576171875, "loss": -1.9309, "loss/gen": 0.07412834465503693, "loss/real": -1.9543354511260986, "rewards/accuracies": 1.0, "rewards/generated": -521.9170532226562, "rewards/margins": 517.3505859375, "rewards/real": -4.5664520263671875, "step": 186 }, { "epoch": 0.98, "grad_norm": 55.42259963932809, "learning_rate": 4.3413978494623655e-07, "logits/generated": -1.5127736330032349, "logits/oppo_generated": -2.788801908493042, "logits/oppo_real": -2.9535598754882812, "logits/real": -1.855088233947754, "logps/generated": -925.1292114257812, "logps/oppo_gen": -66.30078125, "logps/oppo_real": -343.6934814453125, "logps/real": -336.6583557128906, "loss": -1.8923, "loss/gen": 0.016879774630069733, "loss/real": -2.0703513622283936, "rewards/accuracies": 1.0, "rewards/generated": -858.828369140625, "rewards/margins": 865.863525390625, "rewards/real": 7.035131454467773, "step": 187 }, { "epoch": 0.98, "grad_norm": 104.95930666066492, "learning_rate": 4.336917562724014e-07, "logits/generated": -1.429623007774353, "logits/oppo_generated": -2.987077474594116, "logits/oppo_real": -2.9259276390075684, "logits/real": -2.400783061981201, "logps/generated": -575.1930541992188, "logps/oppo_gen": -55.12033462524414, "logps/oppo_real": -192.61981201171875, "logps/real": -160.576904296875, "loss": -1.8125, "loss/gen": 0.07752130925655365, "loss/real": -2.3204290866851807, "rewards/accuracies": 1.0, "rewards/generated": -520.07275390625, "rewards/margins": 552.1156005859375, "rewards/real": 32.04291534423828, "step": 188 }, { "epoch": 0.99, "grad_norm": 97.62940130923675, "learning_rate": 4.332437275985663e-07, "logits/generated": -1.4225895404815674, "logits/oppo_generated": -2.717535972595215, "logits/oppo_real": -2.961338520050049, "logits/real": -2.149232864379883, "logps/generated": -739.74853515625, "logps/oppo_gen": -85.34925079345703, "logps/oppo_real": -546.5931396484375, "logps/real": -515.603759765625, "loss": -1.7931, "loss/gen": 0.017313145101070404, "loss/real": -2.309894323348999, "rewards/accuracies": 1.0, "rewards/generated": -654.3992919921875, "rewards/margins": 685.388671875, "rewards/real": 30.98943328857422, "step": 189 }, { "epoch": 0.99, "grad_norm": 69.46462749238816, "learning_rate": 4.3279569892473114e-07, "logits/generated": -1.5553247928619385, "logits/oppo_generated": -2.8351736068725586, "logits/oppo_real": -2.9403247833251953, "logits/real": -1.9800009727478027, "logps/generated": -723.7879028320312, "logps/oppo_gen": -70.80876922607422, "logps/oppo_real": -184.90956115722656, "logps/real": -161.41104125976562, "loss": -1.9214, "loss/gen": 0.08144047111272812, "loss/real": -2.2349853515625, "rewards/accuracies": 1.0, "rewards/generated": -652.9791259765625, "rewards/margins": 676.4776611328125, "rewards/real": 23.498506546020508, "step": 190 }, { "epoch": 1.0, "grad_norm": 53.04196866117551, "learning_rate": 4.32347670250896e-07, "logits/generated": -1.5968117713928223, "logits/oppo_generated": -2.9442050457000732, "logits/oppo_real": -2.914801597595215, "logits/real": -2.385002374649048, "logps/generated": -731.5435791015625, "logps/oppo_gen": -89.47770690917969, "logps/oppo_real": -427.4810791015625, "logps/real": -419.70947265625, "loss": -1.9711, "loss/gen": 0.019157692790031433, "loss/real": -2.0777158737182617, "rewards/accuracies": 1.0, "rewards/generated": -642.06591796875, "rewards/margins": 649.8374633789062, "rewards/real": 7.771598815917969, "step": 191 } ], "logging_steps": 1.0, "max_steps": 1146, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }