zephyr_kliep_ao01s2_e1 / trainer_state.json
seablue's picture
Upload trainer_state.json with huggingface_hub
0639b2f verified
raw
history blame
145 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9973890339425587,
"eval_steps": 100.0,
"global_step": 191,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.0979137420654297,
"logits/oppo_generated": -3.0123190879821777,
"logits/oppo_real": -3.0979137420654297,
"logits/real": -3.0123190879821777,
"logps/generated": -90.71572875976562,
"logps/oppo_gen": -90.71572875976562,
"logps/oppo_real": -483.66973876953125,
"logps/real": -483.66973876953125,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.83146333694458,
"logits/oppo_generated": -2.7920122146606445,
"logits/oppo_real": -2.83146333694458,
"logits/real": -2.7920122146606445,
"logps/generated": -62.34805679321289,
"logps/oppo_gen": -62.34805679321289,
"logps/oppo_real": -294.31817626953125,
"logps/real": -294.31817626953125,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 2
},
{
"epoch": 0.02,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.919513702392578,
"logits/oppo_generated": -2.9197988510131836,
"logits/oppo_real": -2.919513702392578,
"logits/real": -2.9197988510131836,
"logps/generated": -106.68229675292969,
"logps/oppo_gen": -106.68229675292969,
"logps/oppo_real": -366.2132873535156,
"logps/real": -366.2132873535156,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 3
},
{
"epoch": 0.02,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.021902561187744,
"logits/oppo_generated": -2.7693958282470703,
"logits/oppo_real": -3.021902561187744,
"logits/real": -2.7693958282470703,
"logps/generated": -71.52165222167969,
"logps/oppo_gen": -71.52165222167969,
"logps/oppo_real": -280.28497314453125,
"logps/real": -280.28497314453125,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 4
},
{
"epoch": 0.03,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.7911243438720703,
"logits/oppo_generated": -2.798036575317383,
"logits/oppo_real": -2.7911243438720703,
"logits/real": -2.798036575317383,
"logps/generated": -43.92365646362305,
"logps/oppo_gen": -43.92365646362305,
"logps/oppo_real": -143.5323944091797,
"logps/real": -143.5323944091797,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 5
},
{
"epoch": 0.03,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.104882001876831,
"logits/oppo_generated": -2.8853113651275635,
"logits/oppo_real": -3.104882001876831,
"logits/real": -2.8853113651275635,
"logps/generated": -85.01286315917969,
"logps/oppo_gen": -85.01286315917969,
"logps/oppo_real": -311.1739196777344,
"logps/real": -311.1739196777344,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 6
},
{
"epoch": 0.04,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.0711278915405273,
"logits/oppo_generated": -3.0225138664245605,
"logits/oppo_real": -3.0711278915405273,
"logits/real": -3.0225138664245605,
"logps/generated": -104.07185363769531,
"logps/oppo_gen": -104.07185363769531,
"logps/oppo_real": -375.48779296875,
"logps/real": -375.48779296875,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 7
},
{
"epoch": 0.04,
"grad_norm": 135.4093691943988,
"learning_rate": 1.6666666666666667e-08,
"logits/generated": -2.9351305961608887,
"logits/oppo_generated": -2.9404444694519043,
"logits/oppo_real": -2.9351305961608887,
"logits/real": -2.9404444694519043,
"logps/generated": -94.80602264404297,
"logps/oppo_gen": -94.80602264404297,
"logps/oppo_real": -294.525146484375,
"logps/real": -294.525146484375,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 8
},
{
"epoch": 0.05,
"grad_norm": 142.18188175713527,
"learning_rate": 3.3333333333333334e-08,
"logits/generated": -3.0981688499450684,
"logits/oppo_generated": -2.8864831924438477,
"logits/oppo_real": -3.0981688499450684,
"logits/real": -2.8864831924438477,
"logps/generated": -65.3975830078125,
"logps/oppo_gen": -65.3975830078125,
"logps/oppo_real": -312.339111328125,
"logps/real": -312.339111328125,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 9
},
{
"epoch": 0.05,
"grad_norm": 139.81928207734796,
"learning_rate": 5e-08,
"logits/generated": -2.960458278656006,
"logits/oppo_generated": -2.756359577178955,
"logits/oppo_real": -2.960458278656006,
"logits/real": -2.756359577178955,
"logps/generated": -76.06861877441406,
"logps/oppo_gen": -76.06861877441406,
"logps/oppo_real": -265.8212890625,
"logps/real": -265.8212890625,
"loss": 5.3891,
"loss/gen": 7.389056205749512,
"loss/real": -2.0,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 146.15293253130434,
"learning_rate": 6.666666666666667e-08,
"logits/generated": -2.5165090560913086,
"logits/oppo_generated": -2.6119813919067383,
"logits/oppo_real": -2.516786575317383,
"logits/real": -2.611802101135254,
"logps/generated": -118.57888793945312,
"logps/oppo_gen": -118.53258514404297,
"logps/oppo_real": -290.76776123046875,
"logps/real": -290.7937927246094,
"loss": 5.3858,
"loss/gen": 7.385635852813721,
"loss/real": -1.9997397661209106,
"rewards/accuracies": 0.75,
"rewards/generated": -0.04630112648010254,
"rewards/margins": 0.020273208618164062,
"rewards/real": -0.026027917861938477,
"step": 11
},
{
"epoch": 0.06,
"grad_norm": 135.79117124283155,
"learning_rate": 8.333333333333333e-08,
"logits/generated": -3.1297383308410645,
"logits/oppo_generated": -3.0934252738952637,
"logits/oppo_real": -3.130321502685547,
"logits/real": -3.0924453735351562,
"logps/generated": -96.59043884277344,
"logps/oppo_gen": -96.39014434814453,
"logps/oppo_real": -432.87994384765625,
"logps/real": -432.9808349609375,
"loss": 5.3776,
"loss/gen": 7.374272346496582,
"loss/real": -1.9989911317825317,
"rewards/accuracies": 0.75,
"rewards/generated": -0.20029473304748535,
"rewards/margins": 0.09940791130065918,
"rewards/real": -0.10088682174682617,
"step": 12
},
{
"epoch": 0.07,
"grad_norm": 138.10590224307938,
"learning_rate": 1e-07,
"logits/generated": -2.457440137863159,
"logits/oppo_generated": -2.3860814571380615,
"logits/oppo_real": -2.459930419921875,
"logits/real": -2.383344888687134,
"logps/generated": -67.26084899902344,
"logps/oppo_gen": -66.88719940185547,
"logps/oppo_real": -307.066650390625,
"logps/real": -307.28009033203125,
"loss": 5.3544,
"loss/gen": 7.361501216888428,
"loss/real": -1.9978655576705933,
"rewards/accuracies": 0.875,
"rewards/generated": -0.37365150451660156,
"rewards/margins": 0.16021156311035156,
"rewards/real": -0.21343994140625,
"step": 13
},
{
"epoch": 0.07,
"grad_norm": 134.5610848160604,
"learning_rate": 1.1666666666666667e-07,
"logits/generated": -2.897860050201416,
"logits/oppo_generated": -2.7236456871032715,
"logits/oppo_real": -2.906421661376953,
"logits/real": -2.7175283432006836,
"logps/generated": -55.62891387939453,
"logps/oppo_gen": -54.473785400390625,
"logps/oppo_real": -214.07330322265625,
"logps/real": -214.57391357421875,
"loss": 5.3058,
"loss/gen": 7.3042426109313965,
"loss/real": -1.9949939250946045,
"rewards/accuracies": 0.875,
"rewards/generated": -1.155130386352539,
"rewards/margins": 0.6545138359069824,
"rewards/real": -0.5006165504455566,
"step": 14
},
{
"epoch": 0.08,
"grad_norm": 143.7638580276698,
"learning_rate": 1.3333333333333334e-07,
"logits/generated": -2.9454569816589355,
"logits/oppo_generated": -2.880186080932617,
"logits/oppo_real": -2.954317092895508,
"logits/real": -2.869965076446533,
"logps/generated": -69.53794860839844,
"logps/oppo_gen": -67.36585998535156,
"logps/oppo_real": -272.5278015136719,
"logps/real": -273.41131591796875,
"loss": 5.2534,
"loss/gen": 7.230417251586914,
"loss/real": -1.9911651611328125,
"rewards/accuracies": 1.0,
"rewards/generated": -2.172096014022827,
"rewards/margins": 1.2886085510253906,
"rewards/real": -0.8834874629974365,
"step": 15
},
{
"epoch": 0.08,
"grad_norm": 136.58602486162337,
"learning_rate": 1.5e-07,
"logits/generated": -2.9922404289245605,
"logits/oppo_generated": -2.839021682739258,
"logits/oppo_real": -3.0036399364471436,
"logits/real": -2.8292860984802246,
"logps/generated": -84.50007629394531,
"logps/oppo_gen": -81.83857727050781,
"logps/oppo_real": -431.7451171875,
"logps/real": -432.54998779296875,
"loss": 5.221,
"loss/gen": 7.19518518447876,
"loss/real": -1.9919514656066895,
"rewards/accuracies": 1.0,
"rewards/generated": -2.661508798599243,
"rewards/margins": 1.8566477298736572,
"rewards/real": -0.8048610687255859,
"step": 16
},
{
"epoch": 0.09,
"grad_norm": 129.87295474809218,
"learning_rate": 1.6666666666666665e-07,
"logits/generated": -2.658919334411621,
"logits/oppo_generated": -2.676953077316284,
"logits/oppo_real": -2.681703805923462,
"logits/real": -2.6546268463134766,
"logps/generated": -79.03924560546875,
"logps/oppo_gen": -73.95083618164062,
"logps/oppo_real": -231.33941650390625,
"logps/real": -233.62753295898438,
"loss": 4.9962,
"loss/gen": 7.023112773895264,
"loss/real": -1.9771190881729126,
"rewards/accuracies": 1.0,
"rewards/generated": -5.088409423828125,
"rewards/margins": 2.800309896469116,
"rewards/real": -2.2880992889404297,
"step": 17
},
{
"epoch": 0.09,
"grad_norm": 128.38777930550867,
"learning_rate": 1.833333333333333e-07,
"logits/generated": -3.0433835983276367,
"logits/oppo_generated": -2.802915096282959,
"logits/oppo_real": -3.083519458770752,
"logits/real": -2.778608560562134,
"logps/generated": -60.27662658691406,
"logps/oppo_gen": -53.611244201660156,
"logps/oppo_real": -254.54031372070312,
"logps/real": -258.7552490234375,
"loss": 4.9034,
"loss/gen": 6.913381099700928,
"loss/real": -1.957850694656372,
"rewards/accuracies": 0.875,
"rewards/generated": -6.66538143157959,
"rewards/margins": 2.4504411220550537,
"rewards/real": -4.214940547943115,
"step": 18
},
{
"epoch": 0.1,
"grad_norm": 134.49697225913675,
"learning_rate": 2e-07,
"logits/generated": -3.050615072250366,
"logits/oppo_generated": -2.9067916870117188,
"logits/oppo_real": -3.0934958457946777,
"logits/real": -2.865483283996582,
"logps/generated": -97.78262329101562,
"logps/oppo_gen": -87.72978210449219,
"logps/oppo_real": -446.26251220703125,
"logps/real": -449.0770263671875,
"loss": 4.8312,
"loss/gen": 6.683139324188232,
"loss/real": -1.9718552827835083,
"rewards/accuracies": 1.0,
"rewards/generated": -10.052835464477539,
"rewards/margins": 7.238361358642578,
"rewards/real": -2.814474105834961,
"step": 19
},
{
"epoch": 0.1,
"grad_norm": 132.29558967280428,
"learning_rate": 2.1666666666666667e-07,
"logits/generated": -2.8847241401672363,
"logits/oppo_generated": -3.023085832595825,
"logits/oppo_real": -2.9624710083007812,
"logits/real": -2.9275739192962646,
"logps/generated": -84.17431640625,
"logps/oppo_gen": -66.7940902709961,
"logps/oppo_real": -276.38616943359375,
"logps/real": -282.4690856933594,
"loss": 4.4138,
"loss/gen": 6.213091850280762,
"loss/real": -1.9391708374023438,
"rewards/accuracies": 1.0,
"rewards/generated": -17.380220413208008,
"rewards/margins": 11.297311782836914,
"rewards/real": -6.082908630371094,
"step": 20
},
{
"epoch": 0.11,
"grad_norm": 122.03695169138321,
"learning_rate": 2.3333333333333333e-07,
"logits/generated": -2.785062313079834,
"logits/oppo_generated": -2.763364315032959,
"logits/oppo_real": -2.8705592155456543,
"logits/real": -2.6673243045806885,
"logps/generated": -97.63499450683594,
"logps/oppo_gen": -77.94976043701172,
"logps/oppo_real": -317.0445861816406,
"logps/real": -323.0356750488281,
"loss": 4.265,
"loss/gen": 6.084134578704834,
"loss/real": -1.9400889873504639,
"rewards/accuracies": 1.0,
"rewards/generated": -19.68524169921875,
"rewards/margins": 13.694145202636719,
"rewards/real": -5.991097450256348,
"step": 21
},
{
"epoch": 0.11,
"grad_norm": 122.03695169138321,
"learning_rate": 2.3333333333333333e-07,
"logits/generated": -2.726562261581421,
"logits/oppo_generated": -2.855116844177246,
"logits/oppo_real": -2.865086078643799,
"logits/real": -2.7427496910095215,
"logps/generated": -82.3883285522461,
"logps/oppo_gen": -62.901329040527344,
"logps/oppo_real": -202.70956420898438,
"logps/real": -216.28871154785156,
"loss": 4.0626,
"loss/gen": 6.088003635406494,
"loss/real": -1.8642082214355469,
"rewards/accuracies": 0.875,
"rewards/generated": -19.48699951171875,
"rewards/margins": 5.9078264236450195,
"rewards/real": -13.57917308807373,
"step": 22
},
{
"epoch": 0.12,
"grad_norm": 129.4518717878386,
"learning_rate": 2.5e-07,
"logits/generated": -2.3680832386016846,
"logits/oppo_generated": -2.7028326988220215,
"logits/oppo_real": -2.4785587787628174,
"logits/real": -2.602602958679199,
"logps/generated": -85.23381042480469,
"logps/oppo_gen": -63.708274841308594,
"logps/oppo_real": -408.9969482421875,
"logps/real": -411.189697265625,
"loss": 4.0392,
"loss/gen": 5.969209671020508,
"loss/real": -1.978072166442871,
"rewards/accuracies": 1.0,
"rewards/generated": -21.525535583496094,
"rewards/margins": 19.332752227783203,
"rewards/real": -2.192781925201416,
"step": 23
},
{
"epoch": 0.13,
"grad_norm": 126.05579509313525,
"learning_rate": 2.6666666666666667e-07,
"logits/generated": -2.5577526092529297,
"logits/oppo_generated": -2.848795175552368,
"logits/oppo_real": -2.7064318656921387,
"logits/real": -2.7243504524230957,
"logps/generated": -106.70217895507812,
"logps/oppo_gen": -80.21543884277344,
"logps/oppo_real": -328.6651611328125,
"logps/real": -338.0718078613281,
"loss": 3.8879,
"loss/gen": 5.678422451019287,
"loss/real": -1.9059333801269531,
"rewards/accuracies": 1.0,
"rewards/generated": -26.486736297607422,
"rewards/margins": 17.080078125,
"rewards/real": -9.406658172607422,
"step": 24
},
{
"epoch": 0.13,
"grad_norm": 126.74033031409142,
"learning_rate": 2.833333333333333e-07,
"logits/generated": -2.780156135559082,
"logits/oppo_generated": -2.961332321166992,
"logits/oppo_real": -2.937591075897217,
"logits/real": -2.828620672225952,
"logps/generated": -102.30010986328125,
"logps/oppo_gen": -69.95628356933594,
"logps/oppo_real": -426.2795104980469,
"logps/real": -442.405029296875,
"loss": 3.6222,
"loss/gen": 5.359460830688477,
"loss/real": -1.838744878768921,
"rewards/accuracies": 1.0,
"rewards/generated": -32.34381866455078,
"rewards/margins": 16.218303680419922,
"rewards/real": -16.12551498413086,
"step": 25
},
{
"epoch": 0.14,
"grad_norm": 118.32002749429193,
"learning_rate": 3e-07,
"logits/generated": -2.8048362731933594,
"logits/oppo_generated": -2.8632454872131348,
"logits/oppo_real": -2.9817347526550293,
"logits/real": -2.6736109256744385,
"logps/generated": -134.10989379882812,
"logps/oppo_gen": -92.99905395507812,
"logps/oppo_real": -293.31121826171875,
"logps/real": -314.2544250488281,
"loss": 3.3844,
"loss/gen": 4.9427008628845215,
"loss/real": -1.7905679941177368,
"rewards/accuracies": 1.0,
"rewards/generated": -41.11084747314453,
"rewards/margins": 20.167648315429688,
"rewards/real": -20.943199157714844,
"step": 26
},
{
"epoch": 0.14,
"grad_norm": 116.9737175007168,
"learning_rate": 3.166666666666666e-07,
"logits/generated": -2.8670060634613037,
"logits/oppo_generated": -2.9514551162719727,
"logits/oppo_real": -3.061511993408203,
"logits/real": -2.7576708793640137,
"logps/generated": -200.80691528320312,
"logps/oppo_gen": -153.51296997070312,
"logps/oppo_real": -477.2593994140625,
"logps/real": -495.4168701171875,
"loss": 3.1117,
"loss/gen": 4.679584503173828,
"loss/real": -1.818424940109253,
"rewards/accuracies": 1.0,
"rewards/generated": -47.29395294189453,
"rewards/margins": 29.136451721191406,
"rewards/real": -18.157499313354492,
"step": 27
},
{
"epoch": 0.15,
"grad_norm": 114.94145928471589,
"learning_rate": 3.333333333333333e-07,
"logits/generated": -2.597921848297119,
"logits/oppo_generated": -2.8073906898498535,
"logits/oppo_real": -2.8212432861328125,
"logits/real": -2.581136703491211,
"logps/generated": -125.5290756225586,
"logps/oppo_gen": -73.3681411743164,
"logps/oppo_real": -366.8509826660156,
"logps/real": -396.5523986816406,
"loss": 2.8982,
"loss/gen": 4.413166522979736,
"loss/real": -1.7029860019683838,
"rewards/accuracies": 1.0,
"rewards/generated": -52.16094207763672,
"rewards/margins": 22.459529876708984,
"rewards/real": -29.701412200927734,
"step": 28
},
{
"epoch": 0.15,
"grad_norm": 105.4175848115484,
"learning_rate": 3.5e-07,
"logits/generated": -2.5924081802368164,
"logits/oppo_generated": -2.780890941619873,
"logits/oppo_real": -2.8437681198120117,
"logits/real": -2.549656629562378,
"logps/generated": -120.14186096191406,
"logps/oppo_gen": -64.063720703125,
"logps/oppo_real": -288.26275634765625,
"logps/real": -320.1248779296875,
"loss": 2.627,
"loss/gen": 4.243507385253906,
"loss/real": -1.6813790798187256,
"rewards/accuracies": 0.875,
"rewards/generated": -56.078147888183594,
"rewards/margins": 24.216047286987305,
"rewards/real": -31.862098693847656,
"step": 29
},
{
"epoch": 0.16,
"grad_norm": 103.82670827663998,
"learning_rate": 3.666666666666666e-07,
"logits/generated": -2.577294111251831,
"logits/oppo_generated": -2.7607855796813965,
"logits/oppo_real": -2.8285064697265625,
"logits/real": -2.4904675483703613,
"logps/generated": -202.02459716796875,
"logps/oppo_gen": -138.3541259765625,
"logps/oppo_real": -562.3087158203125,
"logps/real": -597.827392578125,
"loss": 2.3475,
"loss/gen": 3.986393928527832,
"loss/real": -1.6448135375976562,
"rewards/accuracies": 0.75,
"rewards/generated": -63.67047882080078,
"rewards/margins": 28.15182113647461,
"rewards/real": -35.51865768432617,
"step": 30
},
{
"epoch": 0.16,
"grad_norm": 100.94574269750062,
"learning_rate": 3.8333333333333335e-07,
"logits/generated": -2.498058319091797,
"logits/oppo_generated": -2.669119358062744,
"logits/oppo_real": -2.8678927421569824,
"logits/real": -2.371706008911133,
"logps/generated": -145.69668579101562,
"logps/oppo_gen": -70.41365051269531,
"logps/oppo_real": -241.83944702148438,
"logps/real": -286.0458984375,
"loss": 2.0654,
"loss/gen": 3.6289942264556885,
"loss/real": -1.5579355955123901,
"rewards/accuracies": 0.75,
"rewards/generated": -75.28302001953125,
"rewards/margins": 31.076583862304688,
"rewards/real": -44.20643997192383,
"step": 31
},
{
"epoch": 0.17,
"grad_norm": 98.157543369384,
"learning_rate": 4e-07,
"logits/generated": -2.1765198707580566,
"logits/oppo_generated": -2.742915630340576,
"logits/oppo_real": -2.5176854133605957,
"logits/real": -2.3865435123443604,
"logps/generated": -154.5072021484375,
"logps/oppo_gen": -88.06977844238281,
"logps/oppo_real": -250.35305786132812,
"logps/real": -301.65313720703125,
"loss": 1.9822,
"loss/gen": 3.8242578506469727,
"loss/real": -1.486999273300171,
"rewards/accuracies": 0.875,
"rewards/generated": -66.43741607666016,
"rewards/margins": 15.137344360351562,
"rewards/real": -51.300071716308594,
"step": 32
},
{
"epoch": 0.17,
"grad_norm": 91.26540965776684,
"learning_rate": 4.1666666666666667e-07,
"logits/generated": -2.5197973251342773,
"logits/oppo_generated": -2.7696216106414795,
"logits/oppo_real": -2.8814268112182617,
"logits/real": -2.416097640991211,
"logps/generated": -142.49354553222656,
"logps/oppo_gen": -71.01982116699219,
"logps/oppo_real": -175.82728576660156,
"logps/real": -231.18699645996094,
"loss": 1.809,
"loss/gen": 3.6547460556030273,
"loss/real": -1.446402907371521,
"rewards/accuracies": 0.875,
"rewards/generated": -71.47373962402344,
"rewards/margins": 16.114028930664062,
"rewards/real": -55.359710693359375,
"step": 33
},
{
"epoch": 0.18,
"grad_norm": 77.6623991529202,
"learning_rate": 4.3333333333333335e-07,
"logits/generated": -2.51528263092041,
"logits/oppo_generated": -2.669196128845215,
"logits/oppo_real": -3.0089945793151855,
"logits/real": -2.259756565093994,
"logps/generated": -147.68319702148438,
"logps/oppo_gen": -57.30543518066406,
"logps/oppo_real": -326.7079772949219,
"logps/real": -385.50555419921875,
"loss": 1.5094,
"loss/gen": 3.075594902038574,
"loss/real": -1.4120240211486816,
"rewards/accuracies": 0.875,
"rewards/generated": -90.37777709960938,
"rewards/margins": 31.580162048339844,
"rewards/real": -58.797607421875,
"step": 34
},
{
"epoch": 0.18,
"grad_norm": 68.83081542698554,
"learning_rate": 4.5e-07,
"logits/generated": -2.14363431930542,
"logits/oppo_generated": -2.504408359527588,
"logits/oppo_real": -2.6590046882629395,
"logits/real": -1.987848162651062,
"logps/generated": -219.39852905273438,
"logps/oppo_gen": -79.17024230957031,
"logps/oppo_real": -203.21951293945312,
"logps/real": -295.66925048828125,
"loss": 1.3109,
"loss/gen": 2.342175006866455,
"loss/real": -1.075502634048462,
"rewards/accuracies": 0.625,
"rewards/generated": -140.228271484375,
"rewards/margins": 47.77854919433594,
"rewards/real": -92.4497299194336,
"step": 35
},
{
"epoch": 0.19,
"grad_norm": 67.77129364404593,
"learning_rate": 4.6666666666666666e-07,
"logits/generated": -2.2981181144714355,
"logits/oppo_generated": -2.8459527492523193,
"logits/oppo_real": -2.959359645843506,
"logits/real": -2.237192153930664,
"logps/generated": -190.5108642578125,
"logps/oppo_gen": -73.61311340332031,
"logps/oppo_real": -330.1354675292969,
"logps/real": -428.3790588378906,
"loss": 1.2112,
"loss/gen": 2.471275806427002,
"loss/real": -1.017564058303833,
"rewards/accuracies": 0.625,
"rewards/generated": -116.89774322509766,
"rewards/margins": 18.654144287109375,
"rewards/real": -98.24359893798828,
"step": 36
},
{
"epoch": 0.19,
"grad_norm": 55.4256192349542,
"learning_rate": 4.833333333333333e-07,
"logits/generated": -2.098021984100342,
"logits/oppo_generated": -2.913597345352173,
"logits/oppo_real": -2.7888224124908447,
"logits/real": -2.2414162158966064,
"logps/generated": -163.77145385742188,
"logps/oppo_gen": -43.92766189575195,
"logps/oppo_real": -201.2423858642578,
"logps/real": -328.263916015625,
"loss": 1.1323,
"loss/gen": 2.4318315982818604,
"loss/real": -0.729784369468689,
"rewards/accuracies": 0.375,
"rewards/generated": -119.84378814697266,
"rewards/margins": -7.177766799926758,
"rewards/real": -127.02156066894531,
"step": 37
},
{
"epoch": 0.2,
"grad_norm": 43.58372700636816,
"learning_rate": 5e-07,
"logits/generated": -2.149717330932617,
"logits/oppo_generated": -2.8873682022094727,
"logits/oppo_real": -2.953073024749756,
"logits/real": -2.14795184135437,
"logps/generated": -204.06512451171875,
"logps/oppo_gen": -63.10968017578125,
"logps/oppo_real": -290.7437744140625,
"logps/real": -395.0226745605469,
"loss": 0.9722,
"loss/gen": 1.9304319620132446,
"loss/real": -0.9572109580039978,
"rewards/accuracies": 0.625,
"rewards/generated": -140.95542907714844,
"rewards/margins": 36.676517486572266,
"rewards/real": -104.2789077758789,
"step": 38
},
{
"epoch": 0.2,
"grad_norm": 43.28064925124593,
"learning_rate": 4.995519713261649e-07,
"logits/generated": -2.0406126976013184,
"logits/oppo_generated": -2.9155023097991943,
"logits/oppo_real": -2.8444814682006836,
"logits/real": -2.224266767501831,
"logps/generated": -214.7823486328125,
"logps/oppo_gen": -57.36619567871094,
"logps/oppo_real": -319.1702880859375,
"logps/real": -470.078369140625,
"loss": 0.9536,
"loss/gen": 1.6517560482025146,
"loss/real": -0.49091899394989014,
"rewards/accuracies": 0.75,
"rewards/generated": -157.41615295410156,
"rewards/margins": 6.508047103881836,
"rewards/real": -150.90811157226562,
"step": 39
},
{
"epoch": 0.21,
"grad_norm": 39.787796189107254,
"learning_rate": 4.991039426523297e-07,
"logits/generated": -2.045642614364624,
"logits/oppo_generated": -2.817904233932495,
"logits/oppo_real": -2.799046277999878,
"logits/real": -2.124079704284668,
"logps/generated": -240.5778350830078,
"logps/oppo_gen": -62.37173080444336,
"logps/oppo_real": -271.6532287597656,
"logps/real": -416.31292724609375,
"loss": 0.8435,
"loss/gen": 1.3889131546020508,
"loss/real": -0.5534029603004456,
"rewards/accuracies": 0.875,
"rewards/generated": -178.2061004638672,
"rewards/margins": 33.546390533447266,
"rewards/real": -144.65969848632812,
"step": 40
},
{
"epoch": 0.21,
"grad_norm": 39.13177763030722,
"learning_rate": 4.986559139784946e-07,
"logits/generated": -2.117490291595459,
"logits/oppo_generated": -2.833721160888672,
"logits/oppo_real": -2.8886466026306152,
"logits/real": -2.132415533065796,
"logps/generated": -227.88870239257812,
"logps/oppo_gen": -62.6229248046875,
"logps/oppo_real": -254.74127197265625,
"logps/real": -392.49224853515625,
"loss": 0.8003,
"loss/gen": 1.5646700859069824,
"loss/real": -0.6224902868270874,
"rewards/accuracies": 0.875,
"rewards/generated": -165.26576232910156,
"rewards/margins": 27.51479148864746,
"rewards/real": -137.7509765625,
"step": 41
},
{
"epoch": 0.22,
"grad_norm": 42.057857807077056,
"learning_rate": 4.982078853046595e-07,
"logits/generated": -2.0003559589385986,
"logits/oppo_generated": -2.829643726348877,
"logits/oppo_real": -2.9243969917297363,
"logits/real": -2.0654373168945312,
"logps/generated": -399.18463134765625,
"logps/oppo_gen": -93.32018280029297,
"logps/oppo_real": -296.91900634765625,
"logps/real": -458.50836181640625,
"loss": 0.7365,
"loss/gen": 0.6350959539413452,
"loss/real": -0.3841061592102051,
"rewards/accuracies": 0.875,
"rewards/generated": -305.86444091796875,
"rewards/margins": 144.27505493164062,
"rewards/real": -161.5894012451172,
"step": 42
},
{
"epoch": 0.22,
"grad_norm": 45.58806028155152,
"learning_rate": 4.977598566308244e-07,
"logits/generated": -1.7739081382751465,
"logits/oppo_generated": -2.5540552139282227,
"logits/oppo_real": -2.583406448364258,
"logits/real": -1.800355315208435,
"logps/generated": -248.06814575195312,
"logps/oppo_gen": -62.72918701171875,
"logps/oppo_real": -156.4404296875,
"logps/real": -300.55322265625,
"loss": 0.6065,
"loss/gen": 1.3522555828094482,
"loss/real": -0.5588721036911011,
"rewards/accuracies": 0.625,
"rewards/generated": -185.33897399902344,
"rewards/margins": 41.22618103027344,
"rewards/real": -144.11279296875,
"step": 43
},
{
"epoch": 0.23,
"grad_norm": 43.27926721541303,
"learning_rate": 4.973118279569893e-07,
"logits/generated": -1.790377140045166,
"logits/oppo_generated": -2.83392333984375,
"logits/oppo_real": -2.716136932373047,
"logits/real": -1.9894132614135742,
"logps/generated": -220.31033325195312,
"logps/oppo_gen": -47.22636795043945,
"logps/oppo_real": -154.70913696289062,
"logps/real": -310.2235412597656,
"loss": 0.5664,
"loss/gen": 1.3508105278015137,
"loss/real": -0.44485586881637573,
"rewards/accuracies": 0.875,
"rewards/generated": -173.08395385742188,
"rewards/margins": 17.569549560546875,
"rewards/real": -155.514404296875,
"step": 44
},
{
"epoch": 0.23,
"grad_norm": 37.24892019460514,
"learning_rate": 4.96863799283154e-07,
"logits/generated": -2.4816246032714844,
"logits/oppo_generated": -2.894857883453369,
"logits/oppo_real": -3.2226767539978027,
"logits/real": -2.1920199394226074,
"logps/generated": -340.05987548828125,
"logps/oppo_gen": -105.26202392578125,
"logps/oppo_real": -493.02239990234375,
"logps/real": -641.823974609375,
"loss": 0.535,
"loss/gen": 0.9430114030838013,
"loss/real": -0.5119848847389221,
"rewards/accuracies": 0.875,
"rewards/generated": -234.7978515625,
"rewards/margins": 85.996337890625,
"rewards/real": -148.801513671875,
"step": 45
},
{
"epoch": 0.24,
"grad_norm": 34.75340901949854,
"learning_rate": 4.96415770609319e-07,
"logits/generated": -1.8138926029205322,
"logits/oppo_generated": -2.9227819442749023,
"logits/oppo_real": -2.7699246406555176,
"logits/real": -2.158921957015991,
"logps/generated": -272.76275634765625,
"logps/oppo_gen": -65.69087982177734,
"logps/oppo_real": -185.58651733398438,
"logps/real": -307.59954833984375,
"loss": 0.472,
"loss/gen": 1.0596184730529785,
"loss/real": -0.7798694372177124,
"rewards/accuracies": 1.0,
"rewards/generated": -207.07186889648438,
"rewards/margins": 85.05882263183594,
"rewards/real": -122.01304626464844,
"step": 46
},
{
"epoch": 0.25,
"grad_norm": 30.414269200427842,
"learning_rate": 4.959677419354838e-07,
"logits/generated": -2.14742374420166,
"logits/oppo_generated": -2.7580342292785645,
"logits/oppo_real": -2.948944330215454,
"logits/real": -2.00089168548584,
"logps/generated": -289.2415771484375,
"logps/oppo_gen": -83.18161010742188,
"logps/oppo_real": -301.14892578125,
"logps/real": -444.14013671875,
"loss": 0.4491,
"loss/gen": 1.023041009902954,
"loss/real": -0.5700880289077759,
"rewards/accuracies": 0.875,
"rewards/generated": -206.05996704101562,
"rewards/margins": 63.068756103515625,
"rewards/real": -142.9912109375,
"step": 47
},
{
"epoch": 0.25,
"grad_norm": 34.17290379109778,
"learning_rate": 4.955197132616487e-07,
"logits/generated": -2.031747579574585,
"logits/oppo_generated": -2.9268949031829834,
"logits/oppo_real": -2.7888307571411133,
"logits/real": -2.1852447986602783,
"logps/generated": -290.5462341308594,
"logps/oppo_gen": -73.60729217529297,
"logps/oppo_real": -385.06817626953125,
"logps/real": -524.4448852539062,
"loss": 0.3841,
"loss/gen": 0.9104207158088684,
"loss/real": -0.6062330007553101,
"rewards/accuracies": 0.875,
"rewards/generated": -216.93894958496094,
"rewards/margins": 77.56224060058594,
"rewards/real": -139.376708984375,
"step": 48
},
{
"epoch": 0.26,
"grad_norm": 29.687072365341113,
"learning_rate": 4.950716845878136e-07,
"logits/generated": -2.092398166656494,
"logits/oppo_generated": -2.939999580383301,
"logits/oppo_real": -2.972858428955078,
"logits/real": -2.12141752243042,
"logps/generated": -248.61563110351562,
"logps/oppo_gen": -63.58892059326172,
"logps/oppo_real": -292.31512451171875,
"logps/real": -430.34051513671875,
"loss": 0.3154,
"loss/gen": 1.2107611894607544,
"loss/real": -0.6197463274002075,
"rewards/accuracies": 0.75,
"rewards/generated": -185.02670288085938,
"rewards/margins": 47.0013427734375,
"rewards/real": -138.02536010742188,
"step": 49
},
{
"epoch": 0.26,
"grad_norm": 34.51648439966765,
"learning_rate": 4.946236559139784e-07,
"logits/generated": -2.0396437644958496,
"logits/oppo_generated": -2.9997801780700684,
"logits/oppo_real": -3.0686826705932617,
"logits/real": -2.159646511077881,
"logps/generated": -287.21685791015625,
"logps/oppo_gen": -74.3673095703125,
"logps/oppo_real": -280.9033203125,
"logps/real": -418.5960388183594,
"loss": 0.2507,
"loss/gen": 0.9513455629348755,
"loss/real": -0.6230726838111877,
"rewards/accuracies": 0.875,
"rewards/generated": -212.8495330810547,
"rewards/margins": 75.15680694580078,
"rewards/real": -137.69273376464844,
"step": 50
},
{
"epoch": 0.27,
"grad_norm": 34.51648439966765,
"learning_rate": 4.946236559139784e-07,
"logits/generated": -1.9025869369506836,
"logits/oppo_generated": -2.9109854698181152,
"logits/oppo_real": -2.8473780155181885,
"logits/real": -2.126164197921753,
"logps/generated": -262.5989990234375,
"logps/oppo_gen": -68.43426513671875,
"logps/oppo_real": -255.580810546875,
"logps/real": -343.41925048828125,
"loss": 0.2655,
"loss/gen": 1.116389513015747,
"loss/real": -1.1216154098510742,
"rewards/accuracies": 1.0,
"rewards/generated": -194.16476440429688,
"rewards/margins": 106.3262939453125,
"rewards/real": -87.83845520019531,
"step": 51
},
{
"epoch": 0.27,
"grad_norm": 48.07575605554737,
"learning_rate": 4.941756272401433e-07,
"logits/generated": -1.9331986904144287,
"logits/oppo_generated": -2.700697422027588,
"logits/oppo_real": -2.7499947547912598,
"logits/real": -1.9495654106140137,
"logps/generated": -310.9825744628906,
"logps/oppo_gen": -70.83297729492188,
"logps/oppo_real": -279.63055419921875,
"logps/real": -442.52166748046875,
"loss": 0.2175,
"loss/gen": 0.9699513912200928,
"loss/real": -0.37108901143074036,
"rewards/accuracies": 0.75,
"rewards/generated": -240.14959716796875,
"rewards/margins": 77.25849914550781,
"rewards/real": -162.89111328125,
"step": 52
},
{
"epoch": 0.28,
"grad_norm": 42.34599711519009,
"learning_rate": 4.937275985663082e-07,
"logits/generated": -2.342167854309082,
"logits/oppo_generated": -2.8682141304016113,
"logits/oppo_real": -3.2790589332580566,
"logits/real": -2.054896116256714,
"logps/generated": -309.45147705078125,
"logps/oppo_gen": -81.61607360839844,
"logps/oppo_real": -221.72312927246094,
"logps/real": -368.44476318359375,
"loss": 0.1235,
"loss/gen": 0.8315409421920776,
"loss/real": -0.532783567905426,
"rewards/accuracies": 0.75,
"rewards/generated": -227.83538818359375,
"rewards/margins": 81.11372375488281,
"rewards/real": -146.72164916992188,
"step": 53
},
{
"epoch": 0.28,
"grad_norm": 25.769006325656385,
"learning_rate": 4.932795698924731e-07,
"logits/generated": -1.7544469833374023,
"logits/oppo_generated": -2.7301034927368164,
"logits/oppo_real": -2.709321975708008,
"logits/real": -1.932969331741333,
"logps/generated": -319.9211730957031,
"logps/oppo_gen": -80.54927062988281,
"logps/oppo_real": -310.97271728515625,
"logps/real": -400.190673828125,
"loss": 0.1326,
"loss/gen": 0.9462176561355591,
"loss/real": -1.1078202724456787,
"rewards/accuracies": 1.0,
"rewards/generated": -239.37188720703125,
"rewards/margins": 150.15391540527344,
"rewards/real": -89.21797943115234,
"step": 54
},
{
"epoch": 0.29,
"grad_norm": 25.790073064114104,
"learning_rate": 4.92831541218638e-07,
"logits/generated": -1.9824426174163818,
"logits/oppo_generated": -2.766693592071533,
"logits/oppo_real": -2.925718307495117,
"logits/real": -1.925614595413208,
"logps/generated": -310.7677001953125,
"logps/oppo_gen": -71.80207824707031,
"logps/oppo_real": -235.78529357910156,
"logps/real": -371.66021728515625,
"loss": 0.0871,
"loss/gen": 0.9566553831100464,
"loss/real": -0.641250491142273,
"rewards/accuracies": 1.0,
"rewards/generated": -238.96560668945312,
"rewards/margins": 103.09065246582031,
"rewards/real": -135.8749542236328,
"step": 55
},
{
"epoch": 0.29,
"grad_norm": 28.998453033227694,
"learning_rate": 4.923835125448029e-07,
"logits/generated": -1.9766473770141602,
"logits/oppo_generated": -2.7838592529296875,
"logits/oppo_real": -2.928971767425537,
"logits/real": -1.9118558168411255,
"logps/generated": -332.1834411621094,
"logps/oppo_gen": -79.61759185791016,
"logps/oppo_real": -232.3800811767578,
"logps/real": -358.1265869140625,
"loss": 0.073,
"loss/gen": 0.9157878160476685,
"loss/real": -0.7425349950790405,
"rewards/accuracies": 0.875,
"rewards/generated": -252.56585693359375,
"rewards/margins": 126.81934356689453,
"rewards/real": -125.74649810791016,
"step": 56
},
{
"epoch": 0.3,
"grad_norm": 22.703764830970215,
"learning_rate": 4.919354838709677e-07,
"logits/generated": -1.9628534317016602,
"logits/oppo_generated": -2.97432541847229,
"logits/oppo_real": -2.8353166580200195,
"logits/real": -2.0759224891662598,
"logps/generated": -339.04296875,
"logps/oppo_gen": -77.88506317138672,
"logps/oppo_real": -276.94805908203125,
"logps/real": -405.2261047363281,
"loss": 0.0645,
"loss/gen": 0.6549752950668335,
"loss/real": -0.7172196507453918,
"rewards/accuracies": 1.0,
"rewards/generated": -261.15789794921875,
"rewards/margins": 132.8798828125,
"rewards/real": -128.2780303955078,
"step": 57
},
{
"epoch": 0.3,
"grad_norm": 25.084795160459006,
"learning_rate": 4.914874551971326e-07,
"logits/generated": -1.7674864530563354,
"logits/oppo_generated": -2.975834846496582,
"logits/oppo_real": -2.730165719985962,
"logits/real": -2.051088333129883,
"logps/generated": -442.43011474609375,
"logps/oppo_gen": -74.65117645263672,
"logps/oppo_real": -194.0476531982422,
"logps/real": -333.55999755859375,
"loss": -0.0438,
"loss/gen": 0.723225474357605,
"loss/real": -0.6048767566680908,
"rewards/accuracies": 0.875,
"rewards/generated": -367.7789306640625,
"rewards/margins": 228.2666015625,
"rewards/real": -139.5123291015625,
"step": 58
},
{
"epoch": 0.31,
"grad_norm": 26.174395867401362,
"learning_rate": 4.910394265232975e-07,
"logits/generated": -1.5783250331878662,
"logits/oppo_generated": -2.842722177505493,
"logits/oppo_real": -2.5714492797851562,
"logits/real": -1.8060765266418457,
"logps/generated": -329.1837158203125,
"logps/oppo_gen": -54.848045349121094,
"logps/oppo_real": -181.15826416015625,
"logps/real": -288.05316162109375,
"loss": -0.1401,
"loss/gen": 0.6347978115081787,
"loss/real": -0.9310512542724609,
"rewards/accuracies": 1.0,
"rewards/generated": -274.3356628417969,
"rewards/margins": 167.4407958984375,
"rewards/real": -106.89486694335938,
"step": 59
},
{
"epoch": 0.31,
"grad_norm": 23.366499227158794,
"learning_rate": 4.905913978494624e-07,
"logits/generated": -1.848489761352539,
"logits/oppo_generated": -2.70564603805542,
"logits/oppo_real": -2.8469276428222656,
"logits/real": -1.7071902751922607,
"logps/generated": -299.8435974121094,
"logps/oppo_gen": -58.444156646728516,
"logps/oppo_real": -162.31228637695312,
"logps/real": -293.0637512207031,
"loss": -0.1455,
"loss/gen": 0.7676070332527161,
"loss/real": -0.6924855709075928,
"rewards/accuracies": 1.0,
"rewards/generated": -241.39942932128906,
"rewards/margins": 110.64800262451172,
"rewards/real": -130.75144958496094,
"step": 60
},
{
"epoch": 0.32,
"grad_norm": 23.965715716104874,
"learning_rate": 4.901433691756272e-07,
"logits/generated": -2.067742347717285,
"logits/oppo_generated": -2.7352287769317627,
"logits/oppo_real": -3.062769889831543,
"logits/real": -1.7817035913467407,
"logps/generated": -279.8091125488281,
"logps/oppo_gen": -58.14486312866211,
"logps/oppo_real": -235.44610595703125,
"logps/real": -335.95269775390625,
"loss": -0.1697,
"loss/gen": 0.8564858436584473,
"loss/real": -0.9949342012405396,
"rewards/accuracies": 1.0,
"rewards/generated": -221.66424560546875,
"rewards/margins": 121.15766143798828,
"rewards/real": -100.506591796875,
"step": 61
},
{
"epoch": 0.32,
"grad_norm": 22.71430331741021,
"learning_rate": 4.896953405017921e-07,
"logits/generated": -1.850874900817871,
"logits/oppo_generated": -2.81040096282959,
"logits/oppo_real": -2.7932534217834473,
"logits/real": -1.9105334281921387,
"logps/generated": -315.2555236816406,
"logps/oppo_gen": -83.87113952636719,
"logps/oppo_real": -450.6523132324219,
"logps/real": -546.0828857421875,
"loss": -0.1951,
"loss/gen": 0.9174035787582397,
"loss/real": -1.0456944704055786,
"rewards/accuracies": 1.0,
"rewards/generated": -231.38438415527344,
"rewards/margins": 135.95382690429688,
"rewards/real": -95.43055725097656,
"step": 62
},
{
"epoch": 0.33,
"grad_norm": 24.960812793115707,
"learning_rate": 4.89247311827957e-07,
"logits/generated": -1.569549560546875,
"logits/oppo_generated": -2.6111321449279785,
"logits/oppo_real": -2.645878314971924,
"logits/real": -1.4946357011795044,
"logps/generated": -293.79986572265625,
"logps/oppo_gen": -46.035884857177734,
"logps/oppo_real": -112.12235260009766,
"logps/real": -228.71566772460938,
"loss": -0.2873,
"loss/gen": 0.8553179502487183,
"loss/real": -0.8340668082237244,
"rewards/accuracies": 0.75,
"rewards/generated": -247.76397705078125,
"rewards/margins": 131.170654296875,
"rewards/real": -116.59332275390625,
"step": 63
},
{
"epoch": 0.33,
"grad_norm": 28.01833397213722,
"learning_rate": 4.887992831541218e-07,
"logits/generated": -1.4229357242584229,
"logits/oppo_generated": -2.796260356903076,
"logits/oppo_real": -2.7083005905151367,
"logits/real": -1.875757098197937,
"logps/generated": -314.1737365722656,
"logps/oppo_gen": -87.55534362792969,
"logps/oppo_real": -460.838623046875,
"logps/real": -533.9716796875,
"loss": -0.23,
"loss/gen": 0.8128476142883301,
"loss/real": -1.268669843673706,
"rewards/accuracies": 1.0,
"rewards/generated": -226.61837768554688,
"rewards/margins": 153.48536682128906,
"rewards/real": -73.13301849365234,
"step": 64
},
{
"epoch": 0.34,
"grad_norm": 25.329698536099624,
"learning_rate": 4.883512544802867e-07,
"logits/generated": -1.8850572109222412,
"logits/oppo_generated": -2.842528820037842,
"logits/oppo_real": -3.0462865829467773,
"logits/real": -1.8970359563827515,
"logps/generated": -293.83636474609375,
"logps/oppo_gen": -73.348388671875,
"logps/oppo_real": -462.6760559082031,
"logps/real": -521.508056640625,
"loss": -0.2203,
"loss/gen": 0.8699493408203125,
"loss/real": -1.411679983139038,
"rewards/accuracies": 1.0,
"rewards/generated": -220.48794555664062,
"rewards/margins": 161.65594482421875,
"rewards/real": -58.832008361816406,
"step": 65
},
{
"epoch": 0.34,
"grad_norm": 23.556966359497565,
"learning_rate": 4.879032258064516e-07,
"logits/generated": -1.7639917135238647,
"logits/oppo_generated": -2.72807240486145,
"logits/oppo_real": -3.0851736068725586,
"logits/real": -1.7024996280670166,
"logps/generated": -304.4716796875,
"logps/oppo_gen": -62.214202880859375,
"logps/oppo_real": -267.1644592285156,
"logps/real": -361.82183837890625,
"loss": -0.3428,
"loss/gen": 0.7082281112670898,
"loss/real": -1.0534261465072632,
"rewards/accuracies": 1.0,
"rewards/generated": -242.25747680664062,
"rewards/margins": 147.60009765625,
"rewards/real": -94.65738677978516,
"step": 66
},
{
"epoch": 0.35,
"grad_norm": 26.432042922680157,
"learning_rate": 4.874551971326164e-07,
"logits/generated": -1.849442720413208,
"logits/oppo_generated": -2.9244961738586426,
"logits/oppo_real": -2.8555960655212402,
"logits/real": -1.9950306415557861,
"logps/generated": -432.98052978515625,
"logps/oppo_gen": -76.25796508789062,
"logps/oppo_real": -273.7462158203125,
"logps/real": -344.53662109375,
"loss": -0.3943,
"loss/gen": 0.6396173238754272,
"loss/real": -1.2920961380004883,
"rewards/accuracies": 1.0,
"rewards/generated": -356.7225341796875,
"rewards/margins": 285.93218994140625,
"rewards/real": -70.79039001464844,
"step": 67
},
{
"epoch": 0.36,
"grad_norm": 25.96543112065059,
"learning_rate": 4.870071684587813e-07,
"logits/generated": -1.903306484222412,
"logits/oppo_generated": -2.7076897621154785,
"logits/oppo_real": -2.8026769161224365,
"logits/real": -1.8154616355895996,
"logps/generated": -306.98822021484375,
"logps/oppo_gen": -92.40176391601562,
"logps/oppo_real": -466.05743408203125,
"logps/real": -509.5103759765625,
"loss": -0.4503,
"loss/gen": 1.017028570175171,
"loss/real": -1.5654706954956055,
"rewards/accuracies": 1.0,
"rewards/generated": -214.58645629882812,
"rewards/margins": 171.13351440429688,
"rewards/real": -43.45293426513672,
"step": 68
},
{
"epoch": 0.36,
"grad_norm": 24.761144113560263,
"learning_rate": 4.865591397849462e-07,
"logits/generated": -1.9471970796585083,
"logits/oppo_generated": -2.91485595703125,
"logits/oppo_real": -3.02333402633667,
"logits/real": -1.9744383096694946,
"logps/generated": -338.57110595703125,
"logps/oppo_gen": -71.20426940917969,
"logps/oppo_real": -316.012451171875,
"logps/real": -401.06097412109375,
"loss": -0.4181,
"loss/gen": 0.5722190737724304,
"loss/real": -1.1495147943496704,
"rewards/accuracies": 1.0,
"rewards/generated": -267.3668212890625,
"rewards/margins": 182.3182830810547,
"rewards/real": -85.04852294921875,
"step": 69
},
{
"epoch": 0.37,
"grad_norm": 25.560786779768364,
"learning_rate": 4.861111111111111e-07,
"logits/generated": -1.8378194570541382,
"logits/oppo_generated": -2.835737466812134,
"logits/oppo_real": -2.825862407684326,
"logits/real": -1.9024310111999512,
"logps/generated": -395.5787658691406,
"logps/oppo_gen": -66.29288482666016,
"logps/oppo_real": -411.4686279296875,
"logps/real": -450.5022277832031,
"loss": -0.491,
"loss/gen": 0.5810615420341492,
"loss/real": -1.6096638441085815,
"rewards/accuracies": 0.875,
"rewards/generated": -329.285888671875,
"rewards/margins": 290.2522888183594,
"rewards/real": -39.03361511230469,
"step": 70
},
{
"epoch": 0.37,
"grad_norm": 27.276715822734577,
"learning_rate": 4.85663082437276e-07,
"logits/generated": -1.2692300081253052,
"logits/oppo_generated": -2.519876480102539,
"logits/oppo_real": -2.3758904933929443,
"logits/real": -1.504533052444458,
"logps/generated": -309.310791015625,
"logps/oppo_gen": -53.77077865600586,
"logps/oppo_real": -232.32125854492188,
"logps/real": -335.275390625,
"loss": -0.4771,
"loss/gen": 0.6719827651977539,
"loss/real": -0.9704589247703552,
"rewards/accuracies": 1.0,
"rewards/generated": -255.53997802734375,
"rewards/margins": 152.58587646484375,
"rewards/real": -102.95411682128906,
"step": 71
},
{
"epoch": 0.38,
"grad_norm": 31.6782193710368,
"learning_rate": 4.852150537634409e-07,
"logits/generated": -1.967892050743103,
"logits/oppo_generated": -2.847916841506958,
"logits/oppo_real": -3.119495391845703,
"logits/real": -1.8703951835632324,
"logps/generated": -302.1221618652344,
"logps/oppo_gen": -61.632965087890625,
"logps/oppo_real": -283.0968322753906,
"logps/real": -316.48565673828125,
"loss": -0.5487,
"loss/gen": 0.792396605014801,
"loss/real": -1.6661118268966675,
"rewards/accuracies": 1.0,
"rewards/generated": -240.48919677734375,
"rewards/margins": 207.1003875732422,
"rewards/real": -33.388816833496094,
"step": 72
},
{
"epoch": 0.38,
"grad_norm": 28.101801413357496,
"learning_rate": 4.847670250896057e-07,
"logits/generated": -1.7287254333496094,
"logits/oppo_generated": -2.817739963531494,
"logits/oppo_real": -2.8102121353149414,
"logits/real": -1.9246106147766113,
"logps/generated": -348.30560302734375,
"logps/oppo_gen": -84.71308135986328,
"logps/oppo_real": -441.73095703125,
"logps/real": -496.3192138671875,
"loss": -0.5813,
"loss/gen": 0.767849326133728,
"loss/real": -1.454117774963379,
"rewards/accuracies": 1.0,
"rewards/generated": -263.5924987792969,
"rewards/margins": 209.0042724609375,
"rewards/real": -54.588233947753906,
"step": 73
},
{
"epoch": 0.39,
"grad_norm": 38.45779443916408,
"learning_rate": 4.843189964157705e-07,
"logits/generated": -1.91994047164917,
"logits/oppo_generated": -2.863375186920166,
"logits/oppo_real": -2.9448790550231934,
"logits/real": -1.9825626611709595,
"logps/generated": -329.95269775390625,
"logps/oppo_gen": -68.82878112792969,
"logps/oppo_real": -370.04193115234375,
"logps/real": -432.669189453125,
"loss": -0.5133,
"loss/gen": 0.658030092716217,
"loss/real": -1.373727798461914,
"rewards/accuracies": 1.0,
"rewards/generated": -261.1239013671875,
"rewards/margins": 198.49668884277344,
"rewards/real": -62.62722396850586,
"step": 74
},
{
"epoch": 0.39,
"grad_norm": 30.626852423248927,
"learning_rate": 4.838709677419355e-07,
"logits/generated": -1.6279140710830688,
"logits/oppo_generated": -2.6728456020355225,
"logits/oppo_real": -2.717794418334961,
"logits/real": -1.7041985988616943,
"logps/generated": -413.343017578125,
"logps/oppo_gen": -96.53443908691406,
"logps/oppo_real": -349.5957336425781,
"logps/real": -410.91473388671875,
"loss": -0.6334,
"loss/gen": 0.40114909410476685,
"loss/real": -1.3868098258972168,
"rewards/accuracies": 1.0,
"rewards/generated": -316.80859375,
"rewards/margins": 255.48956298828125,
"rewards/real": -61.31901550292969,
"step": 75
},
{
"epoch": 0.4,
"grad_norm": 28.100519945214646,
"learning_rate": 4.834229390681004e-07,
"logits/generated": -1.8667380809783936,
"logits/oppo_generated": -2.7693190574645996,
"logits/oppo_real": -2.77004337310791,
"logits/real": -1.9453085660934448,
"logps/generated": -386.6142578125,
"logps/oppo_gen": -76.46708679199219,
"logps/oppo_real": -404.62432861328125,
"logps/real": -449.008056640625,
"loss": -0.7145,
"loss/gen": 0.4149353802204132,
"loss/real": -1.5561623573303223,
"rewards/accuracies": 1.0,
"rewards/generated": -310.1471862792969,
"rewards/margins": 265.763427734375,
"rewards/real": -44.383766174316406,
"step": 76
},
{
"epoch": 0.4,
"grad_norm": 28.100519945214646,
"learning_rate": 4.834229390681004e-07,
"logits/generated": -1.9856168031692505,
"logits/oppo_generated": -2.82077693939209,
"logits/oppo_real": -2.9570560455322266,
"logits/real": -1.8599485158920288,
"logps/generated": -351.98699951171875,
"logps/oppo_gen": -75.9544906616211,
"logps/oppo_real": -272.2792663574219,
"logps/real": -342.5171813964844,
"loss": -0.7074,
"loss/gen": 0.5127630233764648,
"loss/real": -1.2976210117340088,
"rewards/accuracies": 1.0,
"rewards/generated": -276.03253173828125,
"rewards/margins": 205.79464721679688,
"rewards/real": -70.23788452148438,
"step": 77
},
{
"epoch": 0.41,
"grad_norm": 28.89926119061318,
"learning_rate": 4.829749103942652e-07,
"logits/generated": -1.4996392726898193,
"logits/oppo_generated": -2.77388858795166,
"logits/oppo_real": -2.6705479621887207,
"logits/real": -1.6910290718078613,
"logps/generated": -306.9933166503906,
"logps/oppo_gen": -44.69869613647461,
"logps/oppo_real": -146.0938720703125,
"logps/real": -247.02572631835938,
"loss": -0.7324,
"loss/gen": 0.6118674278259277,
"loss/real": -0.990681529045105,
"rewards/accuracies": 1.0,
"rewards/generated": -262.29461669921875,
"rewards/margins": 161.36277770996094,
"rewards/real": -100.93185424804688,
"step": 78
},
{
"epoch": 0.41,
"grad_norm": 31.06954538435709,
"learning_rate": 4.825268817204301e-07,
"logits/generated": -1.8742992877960205,
"logits/oppo_generated": -2.823974847793579,
"logits/oppo_real": -2.8963050842285156,
"logits/real": -1.9178167581558228,
"logps/generated": -351.6239929199219,
"logps/oppo_gen": -77.19644165039062,
"logps/oppo_real": -326.88067626953125,
"logps/real": -378.48602294921875,
"loss": -0.698,
"loss/gen": 0.5047196745872498,
"loss/real": -1.4839468002319336,
"rewards/accuracies": 1.0,
"rewards/generated": -274.42755126953125,
"rewards/margins": 222.82223510742188,
"rewards/real": -51.60532760620117,
"step": 79
},
{
"epoch": 0.42,
"grad_norm": 43.30920802785477,
"learning_rate": 4.820788530465949e-07,
"logits/generated": -1.7955554723739624,
"logits/oppo_generated": -2.8842811584472656,
"logits/oppo_real": -2.962029457092285,
"logits/real": -1.9502441883087158,
"logps/generated": -325.9687194824219,
"logps/oppo_gen": -54.408782958984375,
"logps/oppo_real": -296.562255859375,
"logps/real": -371.3411865234375,
"loss": -0.6694,
"loss/gen": 0.6074373126029968,
"loss/real": -1.2522108554840088,
"rewards/accuracies": 1.0,
"rewards/generated": -271.5599365234375,
"rewards/margins": 196.781005859375,
"rewards/real": -74.77892303466797,
"step": 80
},
{
"epoch": 0.42,
"grad_norm": 67.32772068514016,
"learning_rate": 4.816308243727598e-07,
"logits/generated": -1.738360047340393,
"logits/oppo_generated": -2.689702272415161,
"logits/oppo_real": -2.605893611907959,
"logits/real": -1.9019668102264404,
"logps/generated": -328.74029541015625,
"logps/oppo_gen": -72.64117431640625,
"logps/oppo_real": -543.97119140625,
"logps/real": -576.9856567382812,
"loss": -0.7155,
"loss/gen": 0.8135882616043091,
"loss/real": -1.6698557138442993,
"rewards/accuracies": 0.875,
"rewards/generated": -256.09912109375,
"rewards/margins": 223.08465576171875,
"rewards/real": -33.014434814453125,
"step": 81
},
{
"epoch": 0.43,
"grad_norm": 45.615423331403214,
"learning_rate": 4.811827956989247e-07,
"logits/generated": -1.8823816776275635,
"logits/oppo_generated": -2.6806015968322754,
"logits/oppo_real": -2.851822853088379,
"logits/real": -1.7596267461776733,
"logps/generated": -390.17926025390625,
"logps/oppo_gen": -60.20751953125,
"logps/oppo_real": -257.2502136230469,
"logps/real": -304.0963439941406,
"loss": -0.8542,
"loss/gen": 0.6062071919441223,
"loss/real": -1.531538486480713,
"rewards/accuracies": 1.0,
"rewards/generated": -329.97174072265625,
"rewards/margins": 283.1255798339844,
"rewards/real": -46.84613800048828,
"step": 82
},
{
"epoch": 0.43,
"grad_norm": 82.74539439128168,
"learning_rate": 4.807347670250896e-07,
"logits/generated": -2.0427887439727783,
"logits/oppo_generated": -2.855865001678467,
"logits/oppo_real": -3.10068416595459,
"logits/real": -1.9768484830856323,
"logps/generated": -352.57281494140625,
"logps/oppo_gen": -83.05951690673828,
"logps/oppo_real": -407.9609375,
"logps/real": -482.402587890625,
"loss": -0.7341,
"loss/gen": 0.5735456347465515,
"loss/real": -1.2555840015411377,
"rewards/accuracies": 0.875,
"rewards/generated": -269.5133056640625,
"rewards/margins": 195.07171630859375,
"rewards/real": -74.44159698486328,
"step": 83
},
{
"epoch": 0.44,
"grad_norm": 59.27270587807996,
"learning_rate": 4.802867383512544e-07,
"logits/generated": -2.0733423233032227,
"logits/oppo_generated": -2.820967197418213,
"logits/oppo_real": -2.9550280570983887,
"logits/real": -1.9983410835266113,
"logps/generated": -397.8853454589844,
"logps/oppo_gen": -113.86212921142578,
"logps/oppo_real": -391.526123046875,
"logps/real": -436.72381591796875,
"loss": -0.9052,
"loss/gen": 0.5662827491760254,
"loss/real": -1.548022985458374,
"rewards/accuracies": 1.0,
"rewards/generated": -284.0232238769531,
"rewards/margins": 238.82553100585938,
"rewards/real": -45.19770050048828,
"step": 84
},
{
"epoch": 0.44,
"grad_norm": 48.762087844323275,
"learning_rate": 4.798387096774193e-07,
"logits/generated": -1.8719103336334229,
"logits/oppo_generated": -2.8029284477233887,
"logits/oppo_real": -2.9302010536193848,
"logits/real": -1.9117934703826904,
"logps/generated": -291.1409912109375,
"logps/oppo_gen": -59.32632827758789,
"logps/oppo_real": -256.26556396484375,
"logps/real": -281.3046875,
"loss": -0.7887,
"loss/gen": 0.9425208568572998,
"loss/real": -1.7496089935302734,
"rewards/accuracies": 1.0,
"rewards/generated": -231.814697265625,
"rewards/margins": 206.77557373046875,
"rewards/real": -25.03911590576172,
"step": 85
},
{
"epoch": 0.45,
"grad_norm": 54.32774527117797,
"learning_rate": 4.793906810035842e-07,
"logits/generated": -1.8661472797393799,
"logits/oppo_generated": -2.915862560272217,
"logits/oppo_real": -2.777987480163574,
"logits/real": -2.0660204887390137,
"logps/generated": -330.3395080566406,
"logps/oppo_gen": -69.80546569824219,
"logps/oppo_real": -230.58383178710938,
"logps/real": -266.6177673339844,
"loss": -0.93,
"loss/gen": 0.6018867492675781,
"loss/real": -1.6396608352661133,
"rewards/accuracies": 1.0,
"rewards/generated": -260.5340576171875,
"rewards/margins": 224.50010681152344,
"rewards/real": -36.03392791748047,
"step": 86
},
{
"epoch": 0.45,
"grad_norm": 56.34434795855748,
"learning_rate": 4.789426523297491e-07,
"logits/generated": -1.8885971307754517,
"logits/oppo_generated": -3.002845048904419,
"logits/oppo_real": -2.9699549674987793,
"logits/real": -2.183290719985962,
"logps/generated": -343.339599609375,
"logps/oppo_gen": -82.54539489746094,
"logps/oppo_real": -261.07891845703125,
"logps/real": -305.88140869140625,
"loss": -0.9169,
"loss/gen": 0.6482059955596924,
"loss/real": -1.5519750118255615,
"rewards/accuracies": 1.0,
"rewards/generated": -260.794189453125,
"rewards/margins": 215.99168395996094,
"rewards/real": -44.80250930786133,
"step": 87
},
{
"epoch": 0.46,
"grad_norm": 51.83929494119302,
"learning_rate": 4.78494623655914e-07,
"logits/generated": -1.9645639657974243,
"logits/oppo_generated": -2.8191170692443848,
"logits/oppo_real": -2.8346924781799316,
"logits/real": -2.02170991897583,
"logps/generated": -359.3294982910156,
"logps/oppo_gen": -77.02418518066406,
"logps/oppo_real": -365.96343994140625,
"logps/real": -455.93804931640625,
"loss": -0.7995,
"loss/gen": 0.5177488327026367,
"loss/real": -1.1002535820007324,
"rewards/accuracies": 1.0,
"rewards/generated": -282.3053283691406,
"rewards/margins": 192.3306884765625,
"rewards/real": -89.97463989257812,
"step": 88
},
{
"epoch": 0.46,
"grad_norm": 68.54197932426156,
"learning_rate": 4.780465949820789e-07,
"logits/generated": -2.152953624725342,
"logits/oppo_generated": -2.9769649505615234,
"logits/oppo_real": -3.0434319972991943,
"logits/real": -2.1738698482513428,
"logps/generated": -345.0735778808594,
"logps/oppo_gen": -78.12904357910156,
"logps/oppo_real": -379.5708312988281,
"logps/real": -461.0662536621094,
"loss": -0.92,
"loss/gen": 0.7530688047409058,
"loss/real": -1.1850459575653076,
"rewards/accuracies": 1.0,
"rewards/generated": -266.94451904296875,
"rewards/margins": 185.4491424560547,
"rewards/real": -81.49540710449219,
"step": 89
},
{
"epoch": 0.47,
"grad_norm": 50.14855129777554,
"learning_rate": 4.775985663082437e-07,
"logits/generated": -1.7497427463531494,
"logits/oppo_generated": -2.8537919521331787,
"logits/oppo_real": -2.717353105545044,
"logits/real": -2.008902072906494,
"logps/generated": -428.0735778808594,
"logps/oppo_gen": -119.7020492553711,
"logps/oppo_real": -358.0323791503906,
"logps/real": -440.1455383300781,
"loss": -1.0468,
"loss/gen": 0.5207056999206543,
"loss/real": -1.178868293762207,
"rewards/accuracies": 1.0,
"rewards/generated": -308.3714904785156,
"rewards/margins": 226.25833129882812,
"rewards/real": -82.11316680908203,
"step": 90
},
{
"epoch": 0.48,
"grad_norm": 83.74393343233139,
"learning_rate": 4.771505376344086e-07,
"logits/generated": -1.6041526794433594,
"logits/oppo_generated": -2.863269805908203,
"logits/oppo_real": -2.6908156871795654,
"logits/real": -1.9434775114059448,
"logps/generated": -464.01544189453125,
"logps/oppo_gen": -73.95469665527344,
"logps/oppo_real": -308.79437255859375,
"logps/real": -393.7951354980469,
"loss": -0.8861,
"loss/gen": 0.208269402384758,
"loss/real": -1.1499923467636108,
"rewards/accuracies": 1.0,
"rewards/generated": -390.06072998046875,
"rewards/margins": 305.0599365234375,
"rewards/real": -85.00077819824219,
"step": 91
},
{
"epoch": 0.48,
"grad_norm": 51.583056981707905,
"learning_rate": 4.7670250896057344e-07,
"logits/generated": -1.8397870063781738,
"logits/oppo_generated": -2.8908724784851074,
"logits/oppo_real": -2.8895483016967773,
"logits/real": -1.9887313842773438,
"logps/generated": -355.3204650878906,
"logps/oppo_gen": -76.94686126708984,
"logps/oppo_real": -271.02813720703125,
"logps/real": -311.24884033203125,
"loss": -1.0606,
"loss/gen": 0.5185320377349854,
"loss/real": -1.5977928638458252,
"rewards/accuracies": 1.0,
"rewards/generated": -278.37359619140625,
"rewards/margins": 238.15289306640625,
"rewards/real": -40.22071075439453,
"step": 92
},
{
"epoch": 0.49,
"grad_norm": 84.3485346225336,
"learning_rate": 4.762544802867383e-07,
"logits/generated": -2.00917387008667,
"logits/oppo_generated": -2.873152732849121,
"logits/oppo_real": -2.9451375007629395,
"logits/real": -2.055788993835449,
"logps/generated": -423.2607421875,
"logps/oppo_gen": -63.39752960205078,
"logps/oppo_real": -189.16378784179688,
"logps/real": -238.4603271484375,
"loss": -0.9892,
"loss/gen": 0.42659705877304077,
"loss/real": -1.5070346593856812,
"rewards/accuracies": 1.0,
"rewards/generated": -359.86322021484375,
"rewards/margins": 310.566650390625,
"rewards/real": -49.296546936035156,
"step": 93
},
{
"epoch": 0.49,
"grad_norm": 56.707858802814975,
"learning_rate": 4.758064516129032e-07,
"logits/generated": -1.7914602756500244,
"logits/oppo_generated": -2.698634147644043,
"logits/oppo_real": -2.8618617057800293,
"logits/real": -1.7156788110733032,
"logps/generated": -337.7628479003906,
"logps/oppo_gen": -66.55247497558594,
"logps/oppo_real": -330.7273254394531,
"logps/real": -382.69952392578125,
"loss": -1.1736,
"loss/gen": 0.5937217473983765,
"loss/real": -1.4802782535552979,
"rewards/accuracies": 0.875,
"rewards/generated": -271.21038818359375,
"rewards/margins": 219.2382049560547,
"rewards/real": -51.9721794128418,
"step": 94
},
{
"epoch": 0.5,
"grad_norm": 63.84164674704906,
"learning_rate": 4.753584229390681e-07,
"logits/generated": -1.484118103981018,
"logits/oppo_generated": -2.6399593353271484,
"logits/oppo_real": -2.7412514686584473,
"logits/real": -1.6595765352249146,
"logps/generated": -318.4638366699219,
"logps/oppo_gen": -71.26600646972656,
"logps/oppo_real": -342.77703857421875,
"logps/real": -332.076416015625,
"loss": -1.0251,
"loss/gen": 1.2006943225860596,
"loss/real": -2.107006311416626,
"rewards/accuracies": 1.0,
"rewards/generated": -247.19781494140625,
"rewards/margins": 257.8984680175781,
"rewards/real": 10.700631141662598,
"step": 95
},
{
"epoch": 0.5,
"grad_norm": 75.05080961213876,
"learning_rate": 4.749103942652329e-07,
"logits/generated": -1.6428945064544678,
"logits/oppo_generated": -2.657951831817627,
"logits/oppo_real": -2.851677656173706,
"logits/real": -1.653546929359436,
"logps/generated": -400.077880859375,
"logps/oppo_gen": -85.37565612792969,
"logps/oppo_real": -318.65338134765625,
"logps/real": -342.5865173339844,
"loss": -1.161,
"loss/gen": 0.37989306449890137,
"loss/real": -1.7606685161590576,
"rewards/accuracies": 1.0,
"rewards/generated": -314.70220947265625,
"rewards/margins": 290.7690734863281,
"rewards/real": -23.933155059814453,
"step": 96
},
{
"epoch": 0.51,
"grad_norm": 46.01563826400488,
"learning_rate": 4.7446236559139785e-07,
"logits/generated": -1.8539488315582275,
"logits/oppo_generated": -2.7544326782226562,
"logits/oppo_real": -2.9937453269958496,
"logits/real": -1.6929676532745361,
"logps/generated": -571.8555908203125,
"logps/oppo_gen": -91.8690185546875,
"logps/oppo_real": -138.0150604248047,
"logps/real": -236.94998168945312,
"loss": -1.303,
"loss/gen": 0.19529122114181519,
"loss/real": -1.010650634765625,
"rewards/accuracies": 1.0,
"rewards/generated": -479.98651123046875,
"rewards/margins": 381.0516357421875,
"rewards/real": -98.93492889404297,
"step": 97
},
{
"epoch": 0.51,
"grad_norm": 108.21823112061571,
"learning_rate": 4.740143369175627e-07,
"logits/generated": -1.5786761045455933,
"logits/oppo_generated": -2.937568187713623,
"logits/oppo_real": -2.910910129547119,
"logits/real": -2.0116238594055176,
"logps/generated": -319.71612548828125,
"logps/oppo_gen": -72.81363677978516,
"logps/oppo_real": -349.1295166015625,
"logps/real": -343.4236145019531,
"loss": -1.2469,
"loss/gen": 1.134260654449463,
"loss/real": -2.0570592880249023,
"rewards/accuracies": 1.0,
"rewards/generated": -246.9025115966797,
"rewards/margins": 252.60841369628906,
"rewards/real": 5.705905914306641,
"step": 98
},
{
"epoch": 0.52,
"grad_norm": 61.41563819772921,
"learning_rate": 4.7356630824372756e-07,
"logits/generated": -1.6027933359146118,
"logits/oppo_generated": -2.8456101417541504,
"logits/oppo_real": -2.7031779289245605,
"logits/real": -1.860973596572876,
"logps/generated": -398.1639404296875,
"logps/oppo_gen": -74.38111114501953,
"logps/oppo_real": -210.15036010742188,
"logps/real": -244.78244018554688,
"loss": -1.2393,
"loss/gen": 0.38152068853378296,
"loss/real": -1.653679370880127,
"rewards/accuracies": 1.0,
"rewards/generated": -323.7828369140625,
"rewards/margins": 289.1507568359375,
"rewards/real": -34.63207244873047,
"step": 99
},
{
"epoch": 0.52,
"grad_norm": 155.75063954523588,
"learning_rate": 4.731182795698925e-07,
"logits/generated": -1.7713713645935059,
"logits/oppo_generated": -2.8624844551086426,
"logits/oppo_real": -3.1369876861572266,
"logits/real": -1.7838329076766968,
"logps/generated": -434.6895446777344,
"logps/oppo_gen": -89.36515808105469,
"logps/oppo_real": -372.01629638671875,
"logps/real": -403.449462890625,
"loss": -1.0078,
"loss/gen": 0.25918668508529663,
"loss/real": -1.6856684684753418,
"rewards/accuracies": 1.0,
"rewards/generated": -345.32440185546875,
"rewards/margins": 313.8912353515625,
"rewards/real": -31.433147430419922,
"step": 100
},
{
"epoch": 0.53,
"grad_norm": 110.75212945892959,
"learning_rate": 4.726702508960573e-07,
"logits/generated": -1.7238816022872925,
"logits/oppo_generated": -2.851134777069092,
"logits/oppo_real": -2.806102752685547,
"logits/real": -1.8398982286453247,
"logps/generated": -455.3783874511719,
"logps/oppo_gen": -102.20521545410156,
"logps/oppo_real": -303.71771240234375,
"logps/real": -384.83721923828125,
"loss": -1.2778,
"loss/gen": 0.28883445262908936,
"loss/real": -1.1888045072555542,
"rewards/accuracies": 1.0,
"rewards/generated": -353.17315673828125,
"rewards/margins": 272.05364990234375,
"rewards/real": -81.11953735351562,
"step": 101
},
{
"epoch": 0.53,
"grad_norm": 208.35696532633605,
"learning_rate": 4.722222222222222e-07,
"logits/generated": -1.8423570394515991,
"logits/oppo_generated": -2.7869691848754883,
"logits/oppo_real": -2.934145212173462,
"logits/real": -1.8845537900924683,
"logps/generated": -446.48486328125,
"logps/oppo_gen": -118.00005340576172,
"logps/oppo_real": -354.1058654785156,
"logps/real": -363.6239013671875,
"loss": -0.9829,
"loss/gen": 0.3972959518432617,
"loss/real": -1.904819130897522,
"rewards/accuracies": 1.0,
"rewards/generated": -328.4848327636719,
"rewards/margins": 318.96673583984375,
"rewards/real": -9.518078804016113,
"step": 102
},
{
"epoch": 0.54,
"grad_norm": 172.94304517672862,
"learning_rate": 4.717741935483871e-07,
"logits/generated": -1.7597293853759766,
"logits/oppo_generated": -2.940918445587158,
"logits/oppo_real": -2.8966355323791504,
"logits/real": -2.1739630699157715,
"logps/generated": -335.61669921875,
"logps/oppo_gen": -59.739017486572266,
"logps/oppo_real": -344.7768249511719,
"logps/real": -356.95220947265625,
"loss": -1.2174,
"loss/gen": 0.9002517461776733,
"loss/real": -1.8782463073730469,
"rewards/accuracies": 1.0,
"rewards/generated": -275.877685546875,
"rewards/margins": 263.70233154296875,
"rewards/real": -12.175359725952148,
"step": 103
},
{
"epoch": 0.54,
"grad_norm": 132.76997113193556,
"learning_rate": 4.7132616487455197e-07,
"logits/generated": -1.8769468069076538,
"logits/oppo_generated": -2.7915775775909424,
"logits/oppo_real": -3.1147103309631348,
"logits/real": -1.7647349834442139,
"logps/generated": -427.8299255371094,
"logps/oppo_gen": -102.62004089355469,
"logps/oppo_real": -260.40576171875,
"logps/real": -333.41436767578125,
"loss": -1.182,
"loss/gen": 0.3168797492980957,
"loss/real": -1.269913911819458,
"rewards/accuracies": 1.0,
"rewards/generated": -325.20989990234375,
"rewards/margins": 252.20127868652344,
"rewards/real": -73.00860595703125,
"step": 104
},
{
"epoch": 0.55,
"grad_norm": 63.105612740666736,
"learning_rate": 4.708781362007168e-07,
"logits/generated": -1.6452438831329346,
"logits/oppo_generated": -2.487020492553711,
"logits/oppo_real": -2.6723856925964355,
"logits/real": -1.5103099346160889,
"logps/generated": -434.71832275390625,
"logps/oppo_gen": -165.93902587890625,
"logps/oppo_real": -273.2274169921875,
"logps/real": -338.7171630859375,
"loss": -1.2914,
"loss/gen": 1.2655643224716187,
"loss/real": -1.3451025485992432,
"rewards/accuracies": 0.875,
"rewards/generated": -268.7793273925781,
"rewards/margins": 203.28956604003906,
"rewards/real": -65.48976135253906,
"step": 105
},
{
"epoch": 0.55,
"grad_norm": 108.93035063315688,
"learning_rate": 4.7043010752688173e-07,
"logits/generated": -1.6793558597564697,
"logits/oppo_generated": -2.73710560798645,
"logits/oppo_real": -2.767047166824341,
"logits/real": -1.719926357269287,
"logps/generated": -647.4390258789062,
"logps/oppo_gen": -92.0302963256836,
"logps/oppo_real": -215.4584503173828,
"logps/real": -281.78326416015625,
"loss": -1.2024,
"loss/gen": 0.2647426724433899,
"loss/real": -1.336751937866211,
"rewards/accuracies": 0.875,
"rewards/generated": -555.40869140625,
"rewards/margins": 489.08392333984375,
"rewards/real": -66.32481384277344,
"step": 106
},
{
"epoch": 0.56,
"grad_norm": 59.14760526838238,
"learning_rate": 4.6998207885304656e-07,
"logits/generated": -1.0961192846298218,
"logits/oppo_generated": -2.6469738483428955,
"logits/oppo_real": -2.294445037841797,
"logits/real": -1.6948471069335938,
"logps/generated": -425.1643981933594,
"logps/oppo_gen": -108.79867553710938,
"logps/oppo_real": -322.5262756347656,
"logps/real": -321.787841796875,
"loss": -1.324,
"loss/gen": 0.8741945028305054,
"loss/real": -2.0073840618133545,
"rewards/accuracies": 1.0,
"rewards/generated": -316.36572265625,
"rewards/margins": 317.1040954589844,
"rewards/real": 0.7384042739868164,
"step": 107
},
{
"epoch": 0.56,
"grad_norm": 168.3731266135573,
"learning_rate": 4.6953405017921144e-07,
"logits/generated": -1.9429898262023926,
"logits/oppo_generated": -2.5572714805603027,
"logits/oppo_real": -2.9609758853912354,
"logits/real": -1.4800224304199219,
"logps/generated": -361.43359375,
"logps/oppo_gen": -79.15040588378906,
"logps/oppo_real": -370.16033935546875,
"logps/real": -355.4413757324219,
"loss": -1.2951,
"loss/gen": 0.8757161498069763,
"loss/real": -2.147189140319824,
"rewards/accuracies": 1.0,
"rewards/generated": -282.2831726074219,
"rewards/margins": 297.0021057128906,
"rewards/real": 14.718932151794434,
"step": 108
},
{
"epoch": 0.57,
"grad_norm": 85.13344591452439,
"learning_rate": 4.690860215053763e-07,
"logits/generated": -1.8185949325561523,
"logits/oppo_generated": -2.818962335586548,
"logits/oppo_real": -2.974072217941284,
"logits/real": -1.8212263584136963,
"logps/generated": -416.0355224609375,
"logps/oppo_gen": -87.5977783203125,
"logps/oppo_real": -245.32896423339844,
"logps/real": -231.43038940429688,
"loss": -1.3389,
"loss/gen": 0.3807252049446106,
"loss/real": -2.1389856338500977,
"rewards/accuracies": 1.0,
"rewards/generated": -328.437744140625,
"rewards/margins": 342.3363037109375,
"rewards/real": 13.898568153381348,
"step": 109
},
{
"epoch": 0.57,
"grad_norm": 83.88629301769151,
"learning_rate": 4.686379928315412e-07,
"logits/generated": -1.616201400756836,
"logits/oppo_generated": -2.633566379547119,
"logits/oppo_real": -2.6259918212890625,
"logits/real": -1.8099052906036377,
"logps/generated": -367.63824462890625,
"logps/oppo_gen": -84.75750732421875,
"logps/oppo_real": -315.4161376953125,
"logps/real": -310.99310302734375,
"loss": -1.3142,
"loss/gen": 1.0110325813293457,
"loss/real": -2.0442302227020264,
"rewards/accuracies": 1.0,
"rewards/generated": -282.8807373046875,
"rewards/margins": 287.30377197265625,
"rewards/real": 4.4230194091796875,
"step": 110
},
{
"epoch": 0.58,
"grad_norm": 105.49686199420815,
"learning_rate": 4.681899641577061e-07,
"logits/generated": -1.8293168544769287,
"logits/oppo_generated": -2.7601919174194336,
"logits/oppo_real": -2.861198902130127,
"logits/real": -1.8698251247406006,
"logps/generated": -464.19207763671875,
"logps/oppo_gen": -63.106407165527344,
"logps/oppo_real": -254.43199157714844,
"logps/real": -266.5618591308594,
"loss": -1.2892,
"loss/gen": 0.18153703212738037,
"loss/real": -1.8787013292312622,
"rewards/accuracies": 1.0,
"rewards/generated": -401.08563232421875,
"rewards/margins": 388.95574951171875,
"rewards/real": -12.129861831665039,
"step": 111
},
{
"epoch": 0.58,
"grad_norm": 122.08849089646002,
"learning_rate": 4.677419354838709e-07,
"logits/generated": -1.6834774017333984,
"logits/oppo_generated": -2.9130988121032715,
"logits/oppo_real": -2.7563557624816895,
"logits/real": -2.015559673309326,
"logps/generated": -434.40545654296875,
"logps/oppo_gen": -71.0981216430664,
"logps/oppo_real": -282.122314453125,
"logps/real": -366.6064453125,
"loss": -1.2773,
"loss/gen": 0.20630814135074615,
"loss/real": -1.1551584005355835,
"rewards/accuracies": 1.0,
"rewards/generated": -363.30731201171875,
"rewards/margins": 278.8231506347656,
"rewards/real": -84.48416137695312,
"step": 112
},
{
"epoch": 0.59,
"grad_norm": 68.69515212721528,
"learning_rate": 4.6729390681003585e-07,
"logits/generated": -1.648697853088379,
"logits/oppo_generated": -2.9668259620666504,
"logits/oppo_real": -2.745316505432129,
"logits/real": -2.1867191791534424,
"logps/generated": -530.75732421875,
"logps/oppo_gen": -77.98722076416016,
"logps/oppo_real": -298.9158020019531,
"logps/real": -301.756103515625,
"loss": -1.4792,
"loss/gen": 0.20026695728302002,
"loss/real": -1.9715969562530518,
"rewards/accuracies": 1.0,
"rewards/generated": -452.77008056640625,
"rewards/margins": 449.92974853515625,
"rewards/real": -2.8403053283691406,
"step": 113
},
{
"epoch": 0.6,
"grad_norm": 57.901015430565856,
"learning_rate": 4.668458781362007e-07,
"logits/generated": -1.9489855766296387,
"logits/oppo_generated": -2.750535726547241,
"logits/oppo_real": -3.053309440612793,
"logits/real": -1.809377670288086,
"logps/generated": -428.41241455078125,
"logps/oppo_gen": -60.58064270019531,
"logps/oppo_real": -289.3247985839844,
"logps/real": -279.5442199707031,
"loss": -1.3882,
"loss/gen": 0.2120557278394699,
"loss/real": -2.097805976867676,
"rewards/accuracies": 1.0,
"rewards/generated": -367.831787109375,
"rewards/margins": 377.6123962402344,
"rewards/real": 9.780599594116211,
"step": 114
},
{
"epoch": 0.6,
"grad_norm": 74.52388262853225,
"learning_rate": 4.6639784946236556e-07,
"logits/generated": -2.027777910232544,
"logits/oppo_generated": -2.857908248901367,
"logits/oppo_real": -2.9202375411987305,
"logits/real": -2.0229392051696777,
"logps/generated": -437.67413330078125,
"logps/oppo_gen": -151.37307739257812,
"logps/oppo_real": -296.711181640625,
"logps/real": -294.72747802734375,
"loss": -1.2855,
"loss/gen": 1.1820727586746216,
"loss/real": -2.019836902618408,
"rewards/accuracies": 1.0,
"rewards/generated": -286.301025390625,
"rewards/margins": 288.28472900390625,
"rewards/real": 1.983699083328247,
"step": 115
},
{
"epoch": 0.61,
"grad_norm": 117.49908576771992,
"learning_rate": 4.6594982078853044e-07,
"logits/generated": -1.4556838274002075,
"logits/oppo_generated": -2.790827751159668,
"logits/oppo_real": -2.5702054500579834,
"logits/real": -1.9723587036132812,
"logps/generated": -398.6170959472656,
"logps/oppo_gen": -72.09220123291016,
"logps/oppo_real": -381.5404357910156,
"logps/real": -404.51959228515625,
"loss": -1.3209,
"loss/gen": 0.30479684472084045,
"loss/real": -1.7702081203460693,
"rewards/accuracies": 1.0,
"rewards/generated": -326.5248718261719,
"rewards/margins": 303.54571533203125,
"rewards/real": -22.979171752929688,
"step": 116
},
{
"epoch": 0.61,
"grad_norm": 65.84271178583474,
"learning_rate": 4.655017921146953e-07,
"logits/generated": -1.4536468982696533,
"logits/oppo_generated": -2.7350287437438965,
"logits/oppo_real": -2.7642884254455566,
"logits/real": -1.7534418106079102,
"logps/generated": -564.8858642578125,
"logps/oppo_gen": -75.31367492675781,
"logps/oppo_real": -265.2264709472656,
"logps/real": -327.7047424316406,
"loss": -1.3234,
"loss/gen": 0.1534099280834198,
"loss/real": -1.3752171993255615,
"rewards/accuracies": 1.0,
"rewards/generated": -489.57220458984375,
"rewards/margins": 427.0939025878906,
"rewards/real": -62.4782829284668,
"step": 117
},
{
"epoch": 0.62,
"grad_norm": 61.410231661624394,
"learning_rate": 4.6505376344086015e-07,
"logits/generated": -1.8642635345458984,
"logits/oppo_generated": -2.9040493965148926,
"logits/oppo_real": -3.1028363704681396,
"logits/real": -1.919235110282898,
"logps/generated": -544.6783447265625,
"logps/oppo_gen": -125.20480346679688,
"logps/oppo_real": -288.18572998046875,
"logps/real": -292.4162902832031,
"loss": -1.5366,
"loss/gen": 0.14077386260032654,
"loss/real": -1.9576942920684814,
"rewards/accuracies": 1.0,
"rewards/generated": -419.47357177734375,
"rewards/margins": 415.24298095703125,
"rewards/real": -4.230566024780273,
"step": 118
},
{
"epoch": 0.62,
"grad_norm": 81.7004759682433,
"learning_rate": 4.646057347670251e-07,
"logits/generated": -1.400865077972412,
"logits/oppo_generated": -2.81662917137146,
"logits/oppo_real": -2.771235227584839,
"logits/real": -1.9147589206695557,
"logps/generated": -483.5452880859375,
"logps/oppo_gen": -75.8317642211914,
"logps/oppo_real": -309.955078125,
"logps/real": -345.3260498046875,
"loss": -1.477,
"loss/gen": 0.14227358996868134,
"loss/real": -1.6462900638580322,
"rewards/accuracies": 1.0,
"rewards/generated": -407.7135009765625,
"rewards/margins": 372.342529296875,
"rewards/real": -35.37098693847656,
"step": 119
},
{
"epoch": 0.63,
"grad_norm": 76.67643945298647,
"learning_rate": 4.641577060931899e-07,
"logits/generated": -1.515089511871338,
"logits/oppo_generated": -2.6174123287200928,
"logits/oppo_real": -2.623584747314453,
"logits/real": -1.833693265914917,
"logps/generated": -522.1652221679688,
"logps/oppo_gen": -89.05635070800781,
"logps/oppo_real": -326.333251953125,
"logps/real": -330.11474609375,
"loss": -1.4459,
"loss/gen": 0.16859720647335052,
"loss/real": -1.9621846675872803,
"rewards/accuracies": 1.0,
"rewards/generated": -433.10888671875,
"rewards/margins": 429.32733154296875,
"rewards/real": -3.7815260887145996,
"step": 120
},
{
"epoch": 0.63,
"grad_norm": 82.81281249941038,
"learning_rate": 4.637096774193548e-07,
"logits/generated": -1.8157904148101807,
"logits/oppo_generated": -3.0457491874694824,
"logits/oppo_real": -2.8690385818481445,
"logits/real": -2.337892532348633,
"logps/generated": -435.04327392578125,
"logps/oppo_gen": -73.73023986816406,
"logps/oppo_real": -327.18359375,
"logps/real": -325.4014892578125,
"loss": -1.5047,
"loss/gen": 0.7044680118560791,
"loss/real": -2.0178208351135254,
"rewards/accuracies": 0.875,
"rewards/generated": -361.31298828125,
"rewards/margins": 363.0950927734375,
"rewards/real": 1.7820682525634766,
"step": 121
},
{
"epoch": 0.64,
"grad_norm": 125.18439524357659,
"learning_rate": 4.6326164874551973e-07,
"logits/generated": -1.8496112823486328,
"logits/oppo_generated": -2.835294246673584,
"logits/oppo_real": -2.8998498916625977,
"logits/real": -2.04846453666687,
"logps/generated": -464.38885498046875,
"logps/oppo_gen": -70.26353454589844,
"logps/oppo_real": -340.30975341796875,
"logps/real": -354.45721435546875,
"loss": -1.4469,
"loss/gen": 0.40815508365631104,
"loss/real": -1.858525276184082,
"rewards/accuracies": 1.0,
"rewards/generated": -394.12530517578125,
"rewards/margins": 379.97784423828125,
"rewards/real": -14.14747428894043,
"step": 122
},
{
"epoch": 0.64,
"grad_norm": 49.26206179629386,
"learning_rate": 4.6281362007168456e-07,
"logits/generated": -1.7525596618652344,
"logits/oppo_generated": -2.7462942600250244,
"logits/oppo_real": -2.756624221801758,
"logits/real": -1.8544926643371582,
"logps/generated": -438.41241455078125,
"logps/oppo_gen": -43.12284851074219,
"logps/oppo_real": -88.672607421875,
"logps/real": -175.98345947265625,
"loss": -1.4557,
"loss/gen": 0.6103305816650391,
"loss/real": -1.1268913745880127,
"rewards/accuracies": 0.75,
"rewards/generated": -395.28961181640625,
"rewards/margins": 307.978759765625,
"rewards/real": -87.31085968017578,
"step": 123
},
{
"epoch": 0.65,
"grad_norm": 61.709413850825875,
"learning_rate": 4.6236559139784944e-07,
"logits/generated": -1.9703348875045776,
"logits/oppo_generated": -3.2303848266601562,
"logits/oppo_real": -3.089721918106079,
"logits/real": -2.517977714538574,
"logps/generated": -519.572021484375,
"logps/oppo_gen": -85.11558532714844,
"logps/oppo_real": -363.27288818359375,
"logps/real": -372.19915771484375,
"loss": -1.6645,
"loss/gen": 0.11303215473890305,
"loss/real": -1.9107370376586914,
"rewards/accuracies": 1.0,
"rewards/generated": -434.4564208984375,
"rewards/margins": 425.5301513671875,
"rewards/real": -8.92629623413086,
"step": 124
},
{
"epoch": 0.65,
"grad_norm": 88.81122448063645,
"learning_rate": 4.619175627240143e-07,
"logits/generated": -1.938302993774414,
"logits/oppo_generated": -2.8577804565429688,
"logits/oppo_real": -2.9372658729553223,
"logits/real": -2.0496668815612793,
"logps/generated": -748.061767578125,
"logps/oppo_gen": -77.513916015625,
"logps/oppo_real": -263.41583251953125,
"logps/real": -329.33160400390625,
"loss": -1.5934,
"loss/gen": 0.13084131479263306,
"loss/real": -1.3408421277999878,
"rewards/accuracies": 1.0,
"rewards/generated": -670.5478515625,
"rewards/margins": 604.6320190429688,
"rewards/real": -65.91577911376953,
"step": 125
},
{
"epoch": 0.66,
"grad_norm": 83.68007225252317,
"learning_rate": 4.614695340501792e-07,
"logits/generated": -1.7787394523620605,
"logits/oppo_generated": -2.7984981536865234,
"logits/oppo_real": -2.694584369659424,
"logits/real": -2.0291075706481934,
"logps/generated": -396.68927001953125,
"logps/oppo_gen": -49.8719596862793,
"logps/oppo_real": -201.35671997070312,
"logps/real": -242.06234741210938,
"loss": -1.5737,
"loss/gen": 0.6225491762161255,
"loss/real": -1.5929436683654785,
"rewards/accuracies": 1.0,
"rewards/generated": -346.81732177734375,
"rewards/margins": 306.1116943359375,
"rewards/real": -40.70562744140625,
"step": 126
},
{
"epoch": 0.66,
"grad_norm": 66.71267170790156,
"learning_rate": 4.6102150537634403e-07,
"logits/generated": -2.229249954223633,
"logits/oppo_generated": -2.8614678382873535,
"logits/oppo_real": -3.104336738586426,
"logits/real": -2.1488466262817383,
"logps/generated": -631.5731201171875,
"logps/oppo_gen": -65.24995422363281,
"logps/oppo_real": -279.1671142578125,
"logps/real": -288.8418273925781,
"loss": -1.5765,
"loss/gen": 0.17149776220321655,
"loss/real": -1.9032527208328247,
"rewards/accuracies": 1.0,
"rewards/generated": -566.3231201171875,
"rewards/margins": 556.6484375,
"rewards/real": -9.67473030090332,
"step": 127
},
{
"epoch": 0.67,
"grad_norm": 169.06562497111884,
"learning_rate": 4.6057347670250897e-07,
"logits/generated": -1.8923313617706299,
"logits/oppo_generated": -2.743807554244995,
"logits/oppo_real": -2.8210201263427734,
"logits/real": -2.0257139205932617,
"logps/generated": -499.35198974609375,
"logps/oppo_gen": -89.97515869140625,
"logps/oppo_real": -332.08160400390625,
"logps/real": -333.4979248046875,
"loss": -1.5969,
"loss/gen": 0.5354217886924744,
"loss/real": -1.9858367443084717,
"rewards/accuracies": 1.0,
"rewards/generated": -409.3768615722656,
"rewards/margins": 407.96051025390625,
"rewards/real": -1.4163341522216797,
"step": 128
},
{
"epoch": 0.67,
"grad_norm": 46.82037963556842,
"learning_rate": 4.601254480286738e-07,
"logits/generated": -1.4846677780151367,
"logits/oppo_generated": -2.88552188873291,
"logits/oppo_real": -2.462414264678955,
"logits/real": -2.096205472946167,
"logps/generated": -536.2479248046875,
"logps/oppo_gen": -83.80229949951172,
"logps/oppo_real": -202.01084899902344,
"logps/real": -235.54244995117188,
"loss": -1.6622,
"loss/gen": 0.28287458419799805,
"loss/real": -1.6646840572357178,
"rewards/accuracies": 0.875,
"rewards/generated": -452.4456481933594,
"rewards/margins": 418.9140625,
"rewards/real": -33.531593322753906,
"step": 129
},
{
"epoch": 0.68,
"grad_norm": 62.38898539042669,
"learning_rate": 4.596774193548387e-07,
"logits/generated": -1.2318034172058105,
"logits/oppo_generated": -2.284450054168701,
"logits/oppo_real": -2.3912582397460938,
"logits/real": -1.4682029485702515,
"logps/generated": -635.2818603515625,
"logps/oppo_gen": -65.28082275390625,
"logps/oppo_real": -298.7229919433594,
"logps/real": -261.49755859375,
"loss": -1.6128,
"loss/gen": 0.3564888834953308,
"loss/real": -2.3722541332244873,
"rewards/accuracies": 1.0,
"rewards/generated": -570.0010986328125,
"rewards/margins": 607.2265625,
"rewards/real": 37.22542953491211,
"step": 130
},
{
"epoch": 0.68,
"grad_norm": 125.28287523889688,
"learning_rate": 4.5922939068100356e-07,
"logits/generated": -1.7798149585723877,
"logits/oppo_generated": -2.862884044647217,
"logits/oppo_real": -3.036806344985962,
"logits/real": -1.983008861541748,
"logps/generated": -748.7784423828125,
"logps/oppo_gen": -90.15806579589844,
"logps/oppo_real": -274.20635986328125,
"logps/real": -275.6498718261719,
"loss": -1.5349,
"loss/gen": 0.04587027058005333,
"loss/real": -1.985565185546875,
"rewards/accuracies": 1.0,
"rewards/generated": -658.620361328125,
"rewards/margins": 657.1768798828125,
"rewards/real": -1.4434819221496582,
"step": 131
},
{
"epoch": 0.69,
"grad_norm": 66.76624114798773,
"learning_rate": 4.5878136200716844e-07,
"logits/generated": -1.8081977367401123,
"logits/oppo_generated": -2.962895393371582,
"logits/oppo_real": -2.877319812774658,
"logits/real": -2.0002503395080566,
"logps/generated": -730.475341796875,
"logps/oppo_gen": -100.83236694335938,
"logps/oppo_real": -181.53245544433594,
"logps/real": -265.831298828125,
"loss": -1.5555,
"loss/gen": 0.8545611500740051,
"loss/real": -1.1570115089416504,
"rewards/accuracies": 0.875,
"rewards/generated": -629.6429443359375,
"rewards/margins": 545.3441162109375,
"rewards/real": -84.29884338378906,
"step": 132
},
{
"epoch": 0.69,
"grad_norm": 58.158852709088926,
"learning_rate": 4.5833333333333327e-07,
"logits/generated": -1.9816755056381226,
"logits/oppo_generated": -2.6472039222717285,
"logits/oppo_real": -2.741997241973877,
"logits/real": -1.9651538133621216,
"logps/generated": -828.5115356445312,
"logps/oppo_gen": -144.51702880859375,
"logps/oppo_real": -452.9317626953125,
"logps/real": -480.84735107421875,
"loss": -1.7693,
"loss/gen": 0.09539352357387543,
"loss/real": -1.720844030380249,
"rewards/accuracies": 1.0,
"rewards/generated": -683.9945068359375,
"rewards/margins": 656.0789184570312,
"rewards/real": -27.915592193603516,
"step": 133
},
{
"epoch": 0.7,
"grad_norm": 90.54872868033543,
"learning_rate": 4.578853046594982e-07,
"logits/generated": -2.1033763885498047,
"logits/oppo_generated": -2.739530086517334,
"logits/oppo_real": -2.9463746547698975,
"logits/real": -2.074946880340576,
"logps/generated": -545.0913696289062,
"logps/oppo_gen": -90.12626647949219,
"logps/oppo_real": -418.7986755371094,
"logps/real": -383.2454528808594,
"loss": -1.6797,
"loss/gen": 0.09754064679145813,
"loss/real": -2.35553240776062,
"rewards/accuracies": 1.0,
"rewards/generated": -454.965087890625,
"rewards/margins": 490.51837158203125,
"rewards/real": 35.55324935913086,
"step": 134
},
{
"epoch": 0.7,
"grad_norm": 83.66236669831007,
"learning_rate": 4.574372759856631e-07,
"logits/generated": -1.986171007156372,
"logits/oppo_generated": -2.7536940574645996,
"logits/oppo_real": -3.0076608657836914,
"logits/real": -2.0253145694732666,
"logps/generated": -483.1329345703125,
"logps/oppo_gen": -57.10042190551758,
"logps/oppo_real": -238.064697265625,
"logps/real": -238.10501098632812,
"loss": -1.6896,
"loss/gen": 0.1678832322359085,
"loss/real": -1.9995965957641602,
"rewards/accuracies": 1.0,
"rewards/generated": -426.032470703125,
"rewards/margins": 425.9921569824219,
"rewards/real": -0.04033064842224121,
"step": 135
},
{
"epoch": 0.71,
"grad_norm": 67.85836942324248,
"learning_rate": 4.569892473118279e-07,
"logits/generated": -1.7882883548736572,
"logits/oppo_generated": -2.4256725311279297,
"logits/oppo_real": -2.8077471256256104,
"logits/real": -1.5553542375564575,
"logps/generated": -575.3634643554688,
"logps/oppo_gen": -58.635196685791016,
"logps/oppo_real": -250.21864318847656,
"logps/real": -216.97225952148438,
"loss": -1.7029,
"loss/gen": 0.1162588894367218,
"loss/real": -2.3324639797210693,
"rewards/accuracies": 1.0,
"rewards/generated": -516.728271484375,
"rewards/margins": 549.974609375,
"rewards/real": 33.24639892578125,
"step": 136
},
{
"epoch": 0.72,
"grad_norm": 73.62041051060501,
"learning_rate": 4.5654121863799285e-07,
"logits/generated": -1.89347505569458,
"logits/oppo_generated": -2.624286651611328,
"logits/oppo_real": -2.8371405601501465,
"logits/real": -1.9667630195617676,
"logps/generated": -548.8859252929688,
"logps/oppo_gen": -106.68203735351562,
"logps/oppo_real": -485.33148193359375,
"logps/real": -392.25909423828125,
"loss": -1.6779,
"loss/gen": 0.1170816719532013,
"loss/real": -2.9307241439819336,
"rewards/accuracies": 1.0,
"rewards/generated": -442.203857421875,
"rewards/margins": 535.2762451171875,
"rewards/real": 93.0723876953125,
"step": 137
},
{
"epoch": 0.72,
"grad_norm": 82.21296242932893,
"learning_rate": 4.560931899641577e-07,
"logits/generated": -1.917464256286621,
"logits/oppo_generated": -2.9165024757385254,
"logits/oppo_real": -2.8914356231689453,
"logits/real": -2.142932415008545,
"logps/generated": -1235.953369140625,
"logps/oppo_gen": -83.79830169677734,
"logps/oppo_real": -193.01220703125,
"logps/real": -226.86917114257812,
"loss": -1.7045,
"loss/gen": 0.0816095620393753,
"loss/real": -1.6614303588867188,
"rewards/accuracies": 1.0,
"rewards/generated": -1152.155029296875,
"rewards/margins": 1118.2979736328125,
"rewards/real": -33.856971740722656,
"step": 138
},
{
"epoch": 0.73,
"grad_norm": 54.73798339518798,
"learning_rate": 4.5564516129032256e-07,
"logits/generated": -2.137822389602661,
"logits/oppo_generated": -2.9398818016052246,
"logits/oppo_real": -3.185572624206543,
"logits/real": -2.284759044647217,
"logps/generated": -675.8824462890625,
"logps/oppo_gen": -117.77006530761719,
"logps/oppo_real": -377.13311767578125,
"logps/real": -368.2445373535156,
"loss": -1.8385,
"loss/gen": 0.03665899857878685,
"loss/real": -2.08888578414917,
"rewards/accuracies": 1.0,
"rewards/generated": -558.1124267578125,
"rewards/margins": 567.0009765625,
"rewards/real": 8.888594627380371,
"step": 139
},
{
"epoch": 0.73,
"grad_norm": 63.7378626619904,
"learning_rate": 4.5519713261648744e-07,
"logits/generated": -1.9477636814117432,
"logits/oppo_generated": -2.771664619445801,
"logits/oppo_real": -2.8211355209350586,
"logits/real": -2.0558881759643555,
"logps/generated": -591.1102294921875,
"logps/oppo_gen": -88.96675109863281,
"logps/oppo_real": -326.7928466796875,
"logps/real": -351.2818603515625,
"loss": -1.7565,
"loss/gen": 0.07419906556606293,
"loss/real": -1.7551099061965942,
"rewards/accuracies": 1.0,
"rewards/generated": -502.1435241699219,
"rewards/margins": 477.6545104980469,
"rewards/real": -24.489017486572266,
"step": 140
},
{
"epoch": 0.74,
"grad_norm": 102.74617921728282,
"learning_rate": 4.547491039426523e-07,
"logits/generated": -1.9030685424804688,
"logits/oppo_generated": -2.7817769050598145,
"logits/oppo_real": -2.727473258972168,
"logits/real": -2.137336492538452,
"logps/generated": -511.839599609375,
"logps/oppo_gen": -66.81544494628906,
"logps/oppo_real": -323.5364685058594,
"logps/real": -293.64990234375,
"loss": -1.7376,
"loss/gen": 0.10144515335559845,
"loss/real": -2.29886531829834,
"rewards/accuracies": 1.0,
"rewards/generated": -445.0241394042969,
"rewards/margins": 474.9106750488281,
"rewards/real": 29.886547088623047,
"step": 141
},
{
"epoch": 0.74,
"grad_norm": 72.97101508797003,
"learning_rate": 4.5430107526881715e-07,
"logits/generated": -2.1376824378967285,
"logits/oppo_generated": -2.7992939949035645,
"logits/oppo_real": -2.914294719696045,
"logits/real": -2.1265478134155273,
"logps/generated": -582.9034423828125,
"logps/oppo_gen": -68.68360900878906,
"logps/oppo_real": -253.99221801757812,
"logps/real": -252.176025390625,
"loss": -1.7525,
"loss/gen": 0.07931329309940338,
"loss/real": -2.018162250518799,
"rewards/accuracies": 1.0,
"rewards/generated": -514.2198486328125,
"rewards/margins": 516.0361328125,
"rewards/real": 1.8162250518798828,
"step": 142
},
{
"epoch": 0.75,
"grad_norm": 84.03182525835078,
"learning_rate": 4.538530465949821e-07,
"logits/generated": -1.9512498378753662,
"logits/oppo_generated": -2.6728546619415283,
"logits/oppo_real": -2.74894380569458,
"logits/real": -1.980553388595581,
"logps/generated": -495.83770751953125,
"logps/oppo_gen": -80.91419982910156,
"logps/oppo_real": -178.15316772460938,
"logps/real": -180.73532104492188,
"loss": -1.6561,
"loss/gen": 0.21434536576271057,
"loss/real": -1.9741783142089844,
"rewards/accuracies": 1.0,
"rewards/generated": -414.92352294921875,
"rewards/margins": 412.34136962890625,
"rewards/real": -2.5821542739868164,
"step": 143
},
{
"epoch": 0.75,
"grad_norm": 122.909519566224,
"learning_rate": 4.534050179211469e-07,
"logits/generated": -1.907271385192871,
"logits/oppo_generated": -2.649775981903076,
"logits/oppo_real": -2.8270368576049805,
"logits/real": -1.920291543006897,
"logps/generated": -488.8565673828125,
"logps/oppo_gen": -64.18344116210938,
"logps/oppo_real": -235.9340057373047,
"logps/real": -287.48065185546875,
"loss": -1.602,
"loss/gen": 0.21500566601753235,
"loss/real": -1.4845335483551025,
"rewards/accuracies": 1.0,
"rewards/generated": -424.673095703125,
"rewards/margins": 373.12646484375,
"rewards/real": -51.5466423034668,
"step": 144
},
{
"epoch": 0.76,
"grad_norm": 96.94029553512564,
"learning_rate": 4.529569892473118e-07,
"logits/generated": -2.026323080062866,
"logits/oppo_generated": -2.943478584289551,
"logits/oppo_real": -2.859900951385498,
"logits/real": -2.257244825363159,
"logps/generated": -603.081787109375,
"logps/oppo_gen": -85.38736724853516,
"logps/oppo_real": -270.22747802734375,
"logps/real": -247.45028686523438,
"loss": -1.6835,
"loss/gen": 0.0911005511879921,
"loss/real": -2.2277719974517822,
"rewards/accuracies": 1.0,
"rewards/generated": -517.6943969726562,
"rewards/margins": 540.4715576171875,
"rewards/real": 22.777204513549805,
"step": 145
},
{
"epoch": 0.76,
"grad_norm": 65.43579495749059,
"learning_rate": 4.5250896057347673e-07,
"logits/generated": -2.3204407691955566,
"logits/oppo_generated": -2.9215641021728516,
"logits/oppo_real": -3.185364246368408,
"logits/real": -2.2590651512145996,
"logps/generated": -651.804443359375,
"logps/oppo_gen": -160.45762634277344,
"logps/oppo_real": -471.71771240234375,
"logps/real": -498.5760803222656,
"loss": -1.7785,
"loss/gen": 0.08252400159835815,
"loss/real": -1.7314162254333496,
"rewards/accuracies": 1.0,
"rewards/generated": -491.3468322753906,
"rewards/margins": 464.48846435546875,
"rewards/real": -26.858369827270508,
"step": 146
},
{
"epoch": 0.77,
"grad_norm": 97.98889346343033,
"learning_rate": 4.5206093189964156e-07,
"logits/generated": -2.0279428958892822,
"logits/oppo_generated": -2.8301095962524414,
"logits/oppo_real": -2.9588708877563477,
"logits/real": -1.925227403640747,
"logps/generated": -647.3221435546875,
"logps/oppo_gen": -65.44461059570312,
"logps/oppo_real": -184.59007263183594,
"logps/real": -213.09719848632812,
"loss": -1.7926,
"loss/gen": 0.09245370328426361,
"loss/real": -1.7149286270141602,
"rewards/accuracies": 1.0,
"rewards/generated": -581.87744140625,
"rewards/margins": 553.3703002929688,
"rewards/real": -28.50714111328125,
"step": 147
},
{
"epoch": 0.77,
"grad_norm": 101.17580711470751,
"learning_rate": 4.5161290322580644e-07,
"logits/generated": -1.8266165256500244,
"logits/oppo_generated": -3.001574993133545,
"logits/oppo_real": -2.8634276390075684,
"logits/real": -2.2667245864868164,
"logps/generated": -614.9871215820312,
"logps/oppo_gen": -83.24380493164062,
"logps/oppo_real": -339.7986755371094,
"logps/real": -321.4783935546875,
"loss": -1.7798,
"loss/gen": 0.04210636392235756,
"loss/real": -2.1832029819488525,
"rewards/accuracies": 1.0,
"rewards/generated": -531.7432861328125,
"rewards/margins": 550.0635986328125,
"rewards/real": 18.320310592651367,
"step": 148
},
{
"epoch": 0.78,
"grad_norm": 125.15491182180986,
"learning_rate": 4.511648745519713e-07,
"logits/generated": -1.8712575435638428,
"logits/oppo_generated": -2.7472705841064453,
"logits/oppo_real": -2.8078293800354004,
"logits/real": -1.9940369129180908,
"logps/generated": -590.0716552734375,
"logps/oppo_gen": -87.50840759277344,
"logps/oppo_real": -388.77752685546875,
"logps/real": -361.9477233886719,
"loss": -1.8277,
"loss/gen": 0.14459219574928284,
"loss/real": -2.2682981491088867,
"rewards/accuracies": 1.0,
"rewards/generated": -502.56329345703125,
"rewards/margins": 529.39306640625,
"rewards/real": 26.829811096191406,
"step": 149
},
{
"epoch": 0.78,
"grad_norm": 130.95518973584055,
"learning_rate": 4.507168458781362e-07,
"logits/generated": -1.9332165718078613,
"logits/oppo_generated": -2.7329964637756348,
"logits/oppo_real": -2.917022705078125,
"logits/real": -1.8709020614624023,
"logps/generated": -633.012939453125,
"logps/oppo_gen": -51.14801788330078,
"logps/oppo_real": -223.8541717529297,
"logps/real": -240.81948852539062,
"loss": -1.7284,
"loss/gen": 0.03336421027779579,
"loss/real": -1.830346703529358,
"rewards/accuracies": 1.0,
"rewards/generated": -581.8649291992188,
"rewards/margins": 564.8995971679688,
"rewards/real": -16.965333938598633,
"step": 150
},
{
"epoch": 0.79,
"grad_norm": 72.55144659029033,
"learning_rate": 4.5026881720430103e-07,
"logits/generated": -1.6202969551086426,
"logits/oppo_generated": -2.933967113494873,
"logits/oppo_real": -2.779536247253418,
"logits/real": -2.1590824127197266,
"logps/generated": -601.2861328125,
"logps/oppo_gen": -59.4964599609375,
"logps/oppo_real": -305.5668029785156,
"logps/real": -321.80731201171875,
"loss": -1.6793,
"loss/gen": 0.09511305391788483,
"loss/real": -1.8375946283340454,
"rewards/accuracies": 1.0,
"rewards/generated": -541.7896728515625,
"rewards/margins": 525.5491333007812,
"rewards/real": -16.240537643432617,
"step": 151
},
{
"epoch": 0.79,
"grad_norm": 60.15745737424725,
"learning_rate": 4.4982078853046596e-07,
"logits/generated": -1.8742468357086182,
"logits/oppo_generated": -2.7897162437438965,
"logits/oppo_real": -2.9050936698913574,
"logits/real": -1.9126145839691162,
"logps/generated": -610.4820556640625,
"logps/oppo_gen": -68.6431884765625,
"logps/oppo_real": -279.5192565917969,
"logps/real": -256.11859130859375,
"loss": -1.8501,
"loss/gen": 0.03932388871908188,
"loss/real": -2.234006881713867,
"rewards/accuracies": 1.0,
"rewards/generated": -541.8388671875,
"rewards/margins": 565.239501953125,
"rewards/real": 23.400684356689453,
"step": 152
},
{
"epoch": 0.8,
"grad_norm": 51.045033636822836,
"learning_rate": 4.493727598566308e-07,
"logits/generated": -2.009788990020752,
"logits/oppo_generated": -2.852003574371338,
"logits/oppo_real": -3.0418591499328613,
"logits/real": -2.0836453437805176,
"logps/generated": -653.099365234375,
"logps/oppo_gen": -87.85763549804688,
"logps/oppo_real": -353.83575439453125,
"logps/real": -326.4531555175781,
"loss": -1.8601,
"loss/gen": 0.03006863407790661,
"loss/real": -2.2738256454467773,
"rewards/accuracies": 1.0,
"rewards/generated": -565.2417602539062,
"rewards/margins": 592.6243896484375,
"rewards/real": 27.38258934020996,
"step": 153
},
{
"epoch": 0.8,
"grad_norm": 38.445754639552696,
"learning_rate": 4.489247311827957e-07,
"logits/generated": -1.6755175590515137,
"logits/oppo_generated": -2.8767549991607666,
"logits/oppo_real": -2.8858747482299805,
"logits/real": -2.09330677986145,
"logps/generated": -611.26708984375,
"logps/oppo_gen": -81.34989166259766,
"logps/oppo_real": -298.17315673828125,
"logps/real": -290.9176025390625,
"loss": -1.8951,
"loss/gen": 0.3788173794746399,
"loss/real": -2.0725557804107666,
"rewards/accuracies": 0.875,
"rewards/generated": -529.917236328125,
"rewards/margins": 537.1728515625,
"rewards/real": 7.255581855773926,
"step": 154
},
{
"epoch": 0.81,
"grad_norm": 59.257956183906224,
"learning_rate": 4.4847670250896056e-07,
"logits/generated": -1.944122314453125,
"logits/oppo_generated": -2.9232120513916016,
"logits/oppo_real": -2.9117484092712402,
"logits/real": -1.9138293266296387,
"logps/generated": -694.2562255859375,
"logps/oppo_gen": -70.24262237548828,
"logps/oppo_real": -278.1219482421875,
"logps/real": -255.45455932617188,
"loss": -1.6892,
"loss/gen": 0.13799193501472473,
"loss/real": -2.2266738414764404,
"rewards/accuracies": 1.0,
"rewards/generated": -624.0136108398438,
"rewards/margins": 646.6810302734375,
"rewards/real": 22.667388916015625,
"step": 155
},
{
"epoch": 0.81,
"grad_norm": 47.609623707095714,
"learning_rate": 4.4802867383512544e-07,
"logits/generated": -0.9755500555038452,
"logits/oppo_generated": -2.88938045501709,
"logits/oppo_real": -2.7122931480407715,
"logits/real": -1.8035005331039429,
"logps/generated": -692.7864379882812,
"logps/oppo_gen": -54.18265914916992,
"logps/oppo_real": -184.002197265625,
"logps/real": -166.90866088867188,
"loss": -1.8166,
"loss/gen": 0.05148601904511452,
"loss/real": -2.1709353923797607,
"rewards/accuracies": 1.0,
"rewards/generated": -638.603759765625,
"rewards/margins": 655.6973266601562,
"rewards/real": 17.09354019165039,
"step": 156
},
{
"epoch": 0.82,
"grad_norm": 58.80905235311886,
"learning_rate": 4.475806451612903e-07,
"logits/generated": -0.8548814654350281,
"logits/oppo_generated": -2.9243669509887695,
"logits/oppo_real": -2.7173829078674316,
"logits/real": -1.9759352207183838,
"logps/generated": -570.697021484375,
"logps/oppo_gen": -67.00720977783203,
"logps/oppo_real": -217.82373046875,
"logps/real": -219.30230712890625,
"loss": -1.6971,
"loss/gen": 0.40054354071617126,
"loss/real": -1.9852139949798584,
"rewards/accuracies": 1.0,
"rewards/generated": -503.6898498535156,
"rewards/margins": 502.21124267578125,
"rewards/real": -1.4785995483398438,
"step": 157
},
{
"epoch": 0.83,
"grad_norm": 47.13598957985156,
"learning_rate": 4.4713261648745515e-07,
"logits/generated": -1.7058589458465576,
"logits/oppo_generated": -2.831021308898926,
"logits/oppo_real": -2.949223756790161,
"logits/real": -1.647479772567749,
"logps/generated": -702.8096923828125,
"logps/oppo_gen": -56.023048400878906,
"logps/oppo_real": -286.0043640136719,
"logps/real": -275.82373046875,
"loss": -1.8097,
"loss/gen": 0.04578549787402153,
"loss/real": -2.101806640625,
"rewards/accuracies": 1.0,
"rewards/generated": -646.7866821289062,
"rewards/margins": 656.9673461914062,
"rewards/real": 10.180654525756836,
"step": 158
},
{
"epoch": 0.83,
"grad_norm": 46.640897193510604,
"learning_rate": 4.466845878136201e-07,
"logits/generated": -1.0372296571731567,
"logits/oppo_generated": -2.9866466522216797,
"logits/oppo_real": -2.8740952014923096,
"logits/real": -2.0625128746032715,
"logps/generated": -717.514892578125,
"logps/oppo_gen": -74.18051147460938,
"logps/oppo_real": -289.81561279296875,
"logps/real": -262.28466796875,
"loss": -1.9636,
"loss/gen": 0.028024822473526,
"loss/real": -2.2753095626831055,
"rewards/accuracies": 1.0,
"rewards/generated": -643.3343505859375,
"rewards/margins": 670.8653564453125,
"rewards/real": 27.53096580505371,
"step": 159
},
{
"epoch": 0.84,
"grad_norm": 86.45016402167784,
"learning_rate": 4.462365591397849e-07,
"logits/generated": -1.1898678541183472,
"logits/oppo_generated": -2.7843871116638184,
"logits/oppo_real": -2.832613945007324,
"logits/real": -1.442640781402588,
"logps/generated": -915.6947631835938,
"logps/oppo_gen": -64.36344909667969,
"logps/oppo_real": -354.620361328125,
"logps/real": -332.0544738769531,
"loss": -1.9388,
"loss/gen": 0.12847158312797546,
"loss/real": -2.225658655166626,
"rewards/accuracies": 1.0,
"rewards/generated": -851.3313598632812,
"rewards/margins": 873.897216796875,
"rewards/real": 22.56588363647461,
"step": 160
},
{
"epoch": 0.84,
"grad_norm": 60.7973006602935,
"learning_rate": 4.457885304659498e-07,
"logits/generated": -1.986168622970581,
"logits/oppo_generated": -2.730388879776001,
"logits/oppo_real": -2.9984025955200195,
"logits/real": -1.6806275844573975,
"logps/generated": -885.4449462890625,
"logps/oppo_gen": -143.77706909179688,
"logps/oppo_real": -439.7186279296875,
"logps/real": -421.04217529296875,
"loss": -1.8178,
"loss/gen": 0.028874732553958893,
"loss/real": -2.1867642402648926,
"rewards/accuracies": 1.0,
"rewards/generated": -741.6678466796875,
"rewards/margins": 760.34423828125,
"rewards/real": 18.676427841186523,
"step": 161
},
{
"epoch": 0.85,
"grad_norm": 84.26484997557552,
"learning_rate": 4.4534050179211467e-07,
"logits/generated": -1.9005441665649414,
"logits/oppo_generated": -2.915806770324707,
"logits/oppo_real": -3.1570920944213867,
"logits/real": -1.8480937480926514,
"logps/generated": -750.3797607421875,
"logps/oppo_gen": -118.90010070800781,
"logps/oppo_real": -341.41363525390625,
"logps/real": -317.5443115234375,
"loss": -1.8443,
"loss/gen": 0.14120692014694214,
"loss/real": -2.2386932373046875,
"rewards/accuracies": 1.0,
"rewards/generated": -631.4796142578125,
"rewards/margins": 655.3489990234375,
"rewards/real": 23.86932373046875,
"step": 162
},
{
"epoch": 0.85,
"grad_norm": 143.02717726886732,
"learning_rate": 4.4489247311827955e-07,
"logits/generated": -1.4650170803070068,
"logits/oppo_generated": -2.837372303009033,
"logits/oppo_real": -2.9020771980285645,
"logits/real": -1.7127195596694946,
"logps/generated": -478.16339111328125,
"logps/oppo_gen": -67.94302368164062,
"logps/oppo_real": -255.73797607421875,
"logps/real": -229.78802490234375,
"loss": -1.8535,
"loss/gen": 0.3787376582622528,
"loss/real": -2.2594995498657227,
"rewards/accuracies": 1.0,
"rewards/generated": -410.22039794921875,
"rewards/margins": 436.1703186035156,
"rewards/real": 25.949939727783203,
"step": 163
},
{
"epoch": 0.86,
"grad_norm": 62.02056150042601,
"learning_rate": 4.444444444444444e-07,
"logits/generated": -1.8700604438781738,
"logits/oppo_generated": -2.756680727005005,
"logits/oppo_real": -3.0085153579711914,
"logits/real": -1.58890962600708,
"logps/generated": -702.9028930664062,
"logps/oppo_gen": -85.88131713867188,
"logps/oppo_real": -249.00379943847656,
"logps/real": -246.01797485351562,
"loss": -1.8894,
"loss/gen": 0.031073393300175667,
"loss/real": -2.029858350753784,
"rewards/accuracies": 1.0,
"rewards/generated": -617.0215454101562,
"rewards/margins": 620.0074462890625,
"rewards/real": 2.9858341217041016,
"step": 164
},
{
"epoch": 0.86,
"grad_norm": 61.25214042816177,
"learning_rate": 4.439964157706093e-07,
"logits/generated": -0.981428325176239,
"logits/oppo_generated": -2.7159523963928223,
"logits/oppo_real": -2.960238218307495,
"logits/real": -1.0548228025436401,
"logps/generated": -598.4140625,
"logps/oppo_gen": -58.635005950927734,
"logps/oppo_real": -400.1387634277344,
"logps/real": -425.5389404296875,
"loss": -1.8947,
"loss/gen": 0.1158125251531601,
"loss/real": -1.7459979057312012,
"rewards/accuracies": 1.0,
"rewards/generated": -539.779052734375,
"rewards/margins": 514.37890625,
"rewards/real": -25.400211334228516,
"step": 165
},
{
"epoch": 0.87,
"grad_norm": 81.0536118151514,
"learning_rate": 4.4354838709677415e-07,
"logits/generated": -1.4463560581207275,
"logits/oppo_generated": -2.678307294845581,
"logits/oppo_real": -2.8269057273864746,
"logits/real": -1.380929708480835,
"logps/generated": -846.7905883789062,
"logps/oppo_gen": -67.97695922851562,
"logps/oppo_real": -219.36227416992188,
"logps/real": -325.5843505859375,
"loss": -1.93,
"loss/gen": 0.5336862206459045,
"loss/real": -0.9377790689468384,
"rewards/accuracies": 1.0,
"rewards/generated": -778.8135986328125,
"rewards/margins": 672.591552734375,
"rewards/real": -106.22209167480469,
"step": 166
},
{
"epoch": 0.87,
"grad_norm": 66.29716039592118,
"learning_rate": 4.4310035842293903e-07,
"logits/generated": -2.113171100616455,
"logits/oppo_generated": -2.955277442932129,
"logits/oppo_real": -3.2049663066864014,
"logits/real": -2.1531317234039307,
"logps/generated": -731.0936279296875,
"logps/oppo_gen": -87.20503234863281,
"logps/oppo_real": -395.21441650390625,
"logps/real": -363.5802001953125,
"loss": -1.8696,
"loss/gen": 0.11870712786912918,
"loss/real": -2.3163421154022217,
"rewards/accuracies": 1.0,
"rewards/generated": -643.8885498046875,
"rewards/margins": 675.5227661132812,
"rewards/real": 31.634204864501953,
"step": 167
},
{
"epoch": 0.88,
"grad_norm": 66.8398673778887,
"learning_rate": 4.4265232974910396e-07,
"logits/generated": -1.6186769008636475,
"logits/oppo_generated": -2.8609347343444824,
"logits/oppo_real": -2.865668773651123,
"logits/real": -1.6237159967422485,
"logps/generated": -770.720703125,
"logps/oppo_gen": -97.59341430664062,
"logps/oppo_real": -205.549560546875,
"logps/real": -184.6165771484375,
"loss": -1.8003,
"loss/gen": 0.018834060057997704,
"loss/real": -2.2093300819396973,
"rewards/accuracies": 1.0,
"rewards/generated": -673.1272583007812,
"rewards/margins": 694.060302734375,
"rewards/real": 20.9329833984375,
"step": 168
},
{
"epoch": 0.88,
"grad_norm": 54.69193731997889,
"learning_rate": 4.422043010752688e-07,
"logits/generated": -2.053788900375366,
"logits/oppo_generated": -2.971303939819336,
"logits/oppo_real": -3.011564016342163,
"logits/real": -2.120933771133423,
"logps/generated": -695.5224609375,
"logps/oppo_gen": -89.73414611816406,
"logps/oppo_real": -350.30523681640625,
"logps/real": -328.1854248046875,
"loss": -1.9518,
"loss/gen": 0.09373271465301514,
"loss/real": -2.2211976051330566,
"rewards/accuracies": 1.0,
"rewards/generated": -605.788330078125,
"rewards/margins": 627.9080810546875,
"rewards/real": 22.119773864746094,
"step": 169
},
{
"epoch": 0.89,
"grad_norm": 52.319817289914155,
"learning_rate": 4.4175627240143367e-07,
"logits/generated": -1.4142050743103027,
"logits/oppo_generated": -2.840271472930908,
"logits/oppo_real": -2.7079410552978516,
"logits/real": -2.1080374717712402,
"logps/generated": -555.2352905273438,
"logps/oppo_gen": -53.288421630859375,
"logps/oppo_real": -155.62603759765625,
"logps/real": -192.53866577148438,
"loss": -1.7625,
"loss/gen": 0.1426987648010254,
"loss/real": -1.6308739185333252,
"rewards/accuracies": 1.0,
"rewards/generated": -501.94683837890625,
"rewards/margins": 465.03424072265625,
"rewards/real": -36.9126091003418,
"step": 170
},
{
"epoch": 0.89,
"grad_norm": 58.41200981497726,
"learning_rate": 4.4130824372759855e-07,
"logits/generated": -1.7520662546157837,
"logits/oppo_generated": -3.0052433013916016,
"logits/oppo_real": -2.8851370811462402,
"logits/real": -2.3384604454040527,
"logps/generated": -719.3140258789062,
"logps/oppo_gen": -76.29854583740234,
"logps/oppo_real": -393.45806884765625,
"logps/real": -362.9105224609375,
"loss": -1.877,
"loss/gen": 0.030190223827958107,
"loss/real": -2.3054752349853516,
"rewards/accuracies": 1.0,
"rewards/generated": -643.0155029296875,
"rewards/margins": 673.56298828125,
"rewards/real": 30.547513961791992,
"step": 171
},
{
"epoch": 0.9,
"grad_norm": 48.523557624428676,
"learning_rate": 4.4086021505376344e-07,
"logits/generated": -1.3822331428527832,
"logits/oppo_generated": -2.7603323459625244,
"logits/oppo_real": -2.456112861633301,
"logits/real": -2.125643253326416,
"logps/generated": -608.9871826171875,
"logps/oppo_gen": -69.90534210205078,
"logps/oppo_real": -236.9437255859375,
"logps/real": -226.87928771972656,
"loss": -1.9436,
"loss/gen": 0.6278254389762878,
"loss/real": -2.100644111633301,
"rewards/accuracies": 1.0,
"rewards/generated": -539.081787109375,
"rewards/margins": 549.146240234375,
"rewards/real": 10.06441879272461,
"step": 172
},
{
"epoch": 0.9,
"grad_norm": 64.36034041215716,
"learning_rate": 4.4041218637992826e-07,
"logits/generated": -1.5218685865402222,
"logits/oppo_generated": -2.696648597717285,
"logits/oppo_real": -2.8764772415161133,
"logits/real": -1.5772918462753296,
"logps/generated": -611.6922607421875,
"logps/oppo_gen": -87.64535522460938,
"logps/oppo_real": -305.02203369140625,
"logps/real": -342.9971618652344,
"loss": -1.7637,
"loss/gen": 0.49089139699935913,
"loss/real": -1.620248556137085,
"rewards/accuracies": 1.0,
"rewards/generated": -524.046875,
"rewards/margins": 486.0717468261719,
"rewards/real": -37.975135803222656,
"step": 173
},
{
"epoch": 0.91,
"grad_norm": 60.28151730111809,
"learning_rate": 4.399641577060932e-07,
"logits/generated": -1.8260202407836914,
"logits/oppo_generated": -2.817328453063965,
"logits/oppo_real": -2.961047649383545,
"logits/real": -1.7334787845611572,
"logps/generated": -653.2313842773438,
"logps/oppo_gen": -62.92127990722656,
"logps/oppo_real": -255.6164093017578,
"logps/real": -285.0434265136719,
"loss": -1.8254,
"loss/gen": 0.058723676949739456,
"loss/real": -1.7057299613952637,
"rewards/accuracies": 1.0,
"rewards/generated": -590.31005859375,
"rewards/margins": 560.883056640625,
"rewards/real": -29.427001953125,
"step": 174
},
{
"epoch": 0.91,
"grad_norm": 99.86272804768053,
"learning_rate": 4.3951612903225803e-07,
"logits/generated": -1.548148512840271,
"logits/oppo_generated": -2.848414897918701,
"logits/oppo_real": -2.944563388824463,
"logits/real": -2.128657102584839,
"logps/generated": -713.473876953125,
"logps/oppo_gen": -95.9730453491211,
"logps/oppo_real": -342.0549621582031,
"logps/real": -321.3438720703125,
"loss": -1.8176,
"loss/gen": 0.12640155851840973,
"loss/real": -2.20711088180542,
"rewards/accuracies": 1.0,
"rewards/generated": -617.5008544921875,
"rewards/margins": 638.2119140625,
"rewards/real": 20.71107292175293,
"step": 175
},
{
"epoch": 0.92,
"grad_norm": 47.58197849747712,
"learning_rate": 4.390681003584229e-07,
"logits/generated": -1.8227300643920898,
"logits/oppo_generated": -2.591665744781494,
"logits/oppo_real": -2.58309268951416,
"logits/real": -2.0603384971618652,
"logps/generated": -735.041259765625,
"logps/oppo_gen": -46.3786735534668,
"logps/oppo_real": -55.683685302734375,
"logps/real": -42.93335723876953,
"loss": -1.9832,
"loss/gen": 0.15213513374328613,
"loss/real": -2.1275031566619873,
"rewards/accuracies": 1.0,
"rewards/generated": -688.66259765625,
"rewards/margins": 701.4129028320312,
"rewards/real": 12.75033187866211,
"step": 176
},
{
"epoch": 0.92,
"grad_norm": 85.49574242414793,
"learning_rate": 4.386200716845878e-07,
"logits/generated": -2.351722240447998,
"logits/oppo_generated": -2.9608449935913086,
"logits/oppo_real": -3.0993542671203613,
"logits/real": -2.4843802452087402,
"logps/generated": -788.0579833984375,
"logps/oppo_gen": -151.79364013671875,
"logps/oppo_real": -470.64190673828125,
"logps/real": -452.0572814941406,
"loss": -1.8285,
"loss/gen": 0.014275267720222473,
"loss/real": -2.1858463287353516,
"rewards/accuracies": 1.0,
"rewards/generated": -636.2643432617188,
"rewards/margins": 654.8489990234375,
"rewards/real": 18.584644317626953,
"step": 177
},
{
"epoch": 0.93,
"grad_norm": 79.58229575021696,
"learning_rate": 4.3817204301075267e-07,
"logits/generated": -2.1911535263061523,
"logits/oppo_generated": -2.955319404602051,
"logits/oppo_real": -3.0618791580200195,
"logits/real": -2.3935108184814453,
"logps/generated": -570.6660766601562,
"logps/oppo_gen": -117.42491149902344,
"logps/oppo_real": -394.2685546875,
"logps/real": -364.75994873046875,
"loss": -1.9488,
"loss/gen": 0.6023984551429749,
"loss/real": -2.295085906982422,
"rewards/accuracies": 1.0,
"rewards/generated": -453.24114990234375,
"rewards/margins": 482.749755859375,
"rewards/real": 29.50858497619629,
"step": 178
},
{
"epoch": 0.93,
"grad_norm": 70.38357171932063,
"learning_rate": 4.377240143369175e-07,
"logits/generated": -1.1671700477600098,
"logits/oppo_generated": -2.867384433746338,
"logits/oppo_real": -2.831949234008789,
"logits/real": -1.4764811992645264,
"logps/generated": -583.820068359375,
"logps/oppo_gen": -63.369956970214844,
"logps/oppo_real": -158.0244598388672,
"logps/real": -278.0089416503906,
"loss": -1.7578,
"loss/gen": 0.7094503045082092,
"loss/real": -0.8001553416252136,
"rewards/accuracies": 0.875,
"rewards/generated": -520.4500732421875,
"rewards/margins": 400.4656066894531,
"rewards/real": -119.98446655273438,
"step": 179
},
{
"epoch": 0.94,
"grad_norm": 74.56631119765144,
"learning_rate": 4.3727598566308243e-07,
"logits/generated": -1.9085487127304077,
"logits/oppo_generated": -2.6366734504699707,
"logits/oppo_real": -2.7779293060302734,
"logits/real": -2.0540764331817627,
"logps/generated": -625.862060546875,
"logps/oppo_gen": -67.02287292480469,
"logps/oppo_real": -384.6851806640625,
"logps/real": -352.558349609375,
"loss": -1.7415,
"loss/gen": 0.05960576981306076,
"loss/real": -2.3212685585021973,
"rewards/accuracies": 1.0,
"rewards/generated": -558.8392333984375,
"rewards/margins": 590.966064453125,
"rewards/real": 32.12684631347656,
"step": 180
},
{
"epoch": 0.95,
"grad_norm": 65.722717426606,
"learning_rate": 4.368279569892473e-07,
"logits/generated": -1.732559323310852,
"logits/oppo_generated": -2.634063959121704,
"logits/oppo_real": -2.9160523414611816,
"logits/real": -1.951407551765442,
"logps/generated": -677.4447631835938,
"logps/oppo_gen": -60.14728546142578,
"logps/oppo_real": -206.13426208496094,
"logps/real": -179.4784393310547,
"loss": -1.912,
"loss/gen": 0.08394724130630493,
"loss/real": -2.2665581703186035,
"rewards/accuracies": 1.0,
"rewards/generated": -617.2974853515625,
"rewards/margins": 643.953369140625,
"rewards/real": 26.65583038330078,
"step": 181
},
{
"epoch": 0.95,
"grad_norm": 69.63255260876554,
"learning_rate": 4.3637992831541214e-07,
"logits/generated": -2.192533493041992,
"logits/oppo_generated": -2.894730567932129,
"logits/oppo_real": -3.0438637733459473,
"logits/real": -2.3837125301361084,
"logps/generated": -801.6678466796875,
"logps/oppo_gen": -99.43392181396484,
"logps/oppo_real": -454.5760192871094,
"logps/real": -435.48876953125,
"loss": -1.9334,
"loss/gen": 0.007741004228591919,
"loss/real": -2.1908724308013916,
"rewards/accuracies": 1.0,
"rewards/generated": -702.2339477539062,
"rewards/margins": 721.3212280273438,
"rewards/real": 19.087242126464844,
"step": 182
},
{
"epoch": 0.96,
"grad_norm": 92.43653861907097,
"learning_rate": 4.359318996415771e-07,
"logits/generated": -1.6997716426849365,
"logits/oppo_generated": -2.791623115539551,
"logits/oppo_real": -2.808715581893921,
"logits/real": -1.985607624053955,
"logps/generated": -561.043212890625,
"logps/oppo_gen": -85.10957336425781,
"logps/oppo_real": -352.60321044921875,
"logps/real": -316.6192626953125,
"loss": -1.8481,
"loss/gen": 0.3760773539543152,
"loss/real": -2.35983943939209,
"rewards/accuracies": 1.0,
"rewards/generated": -475.93359375,
"rewards/margins": 511.9176025390625,
"rewards/real": 35.98394775390625,
"step": 183
},
{
"epoch": 0.96,
"grad_norm": 55.39334883993858,
"learning_rate": 4.354838709677419e-07,
"logits/generated": -2.0199737548828125,
"logits/oppo_generated": -2.7498302459716797,
"logits/oppo_real": -3.0951387882232666,
"logits/real": -2.1367290019989014,
"logps/generated": -683.6644287109375,
"logps/oppo_gen": -88.00972747802734,
"logps/oppo_real": -444.2664794921875,
"logps/real": -413.0567626953125,
"loss": -1.9361,
"loss/gen": 0.04308386147022247,
"loss/real": -2.3120970726013184,
"rewards/accuracies": 1.0,
"rewards/generated": -595.65478515625,
"rewards/margins": 626.864501953125,
"rewards/real": 31.209705352783203,
"step": 184
},
{
"epoch": 0.97,
"grad_norm": 91.63384592359965,
"learning_rate": 4.350358422939068e-07,
"logits/generated": -1.7172353267669678,
"logits/oppo_generated": -2.704036235809326,
"logits/oppo_real": -2.9259088039398193,
"logits/real": -1.7819669246673584,
"logps/generated": -828.3485717773438,
"logps/oppo_gen": -92.61172485351562,
"logps/oppo_real": -306.844970703125,
"logps/real": -278.81842041015625,
"loss": -1.8423,
"loss/gen": 0.39729800820350647,
"loss/real": -2.2802653312683105,
"rewards/accuracies": 1.0,
"rewards/generated": -735.73681640625,
"rewards/margins": 763.7633056640625,
"rewards/real": 28.02651596069336,
"step": 185
},
{
"epoch": 0.97,
"grad_norm": 96.31854803366016,
"learning_rate": 4.3458781362007167e-07,
"logits/generated": -0.24818015098571777,
"logits/oppo_generated": -2.877349853515625,
"logits/oppo_real": -2.687556743621826,
"logits/real": -1.999771237373352,
"logps/generated": -567.3941650390625,
"logps/oppo_gen": -45.4771728515625,
"logps/oppo_real": -291.0182189941406,
"logps/real": -295.58465576171875,
"loss": -1.9309,
"loss/gen": 0.07412834465503693,
"loss/real": -1.9543354511260986,
"rewards/accuracies": 1.0,
"rewards/generated": -521.9170532226562,
"rewards/margins": 517.3505859375,
"rewards/real": -4.5664520263671875,
"step": 186
},
{
"epoch": 0.98,
"grad_norm": 55.42259963932809,
"learning_rate": 4.3413978494623655e-07,
"logits/generated": -1.5127736330032349,
"logits/oppo_generated": -2.788801908493042,
"logits/oppo_real": -2.9535598754882812,
"logits/real": -1.855088233947754,
"logps/generated": -925.1292114257812,
"logps/oppo_gen": -66.30078125,
"logps/oppo_real": -343.6934814453125,
"logps/real": -336.6583557128906,
"loss": -1.8923,
"loss/gen": 0.016879774630069733,
"loss/real": -2.0703513622283936,
"rewards/accuracies": 1.0,
"rewards/generated": -858.828369140625,
"rewards/margins": 865.863525390625,
"rewards/real": 7.035131454467773,
"step": 187
},
{
"epoch": 0.98,
"grad_norm": 104.95930666066492,
"learning_rate": 4.336917562724014e-07,
"logits/generated": -1.429623007774353,
"logits/oppo_generated": -2.987077474594116,
"logits/oppo_real": -2.9259276390075684,
"logits/real": -2.400783061981201,
"logps/generated": -575.1930541992188,
"logps/oppo_gen": -55.12033462524414,
"logps/oppo_real": -192.61981201171875,
"logps/real": -160.576904296875,
"loss": -1.8125,
"loss/gen": 0.07752130925655365,
"loss/real": -2.3204290866851807,
"rewards/accuracies": 1.0,
"rewards/generated": -520.07275390625,
"rewards/margins": 552.1156005859375,
"rewards/real": 32.04291534423828,
"step": 188
},
{
"epoch": 0.99,
"grad_norm": 97.62940130923675,
"learning_rate": 4.332437275985663e-07,
"logits/generated": -1.4225895404815674,
"logits/oppo_generated": -2.717535972595215,
"logits/oppo_real": -2.961338520050049,
"logits/real": -2.149232864379883,
"logps/generated": -739.74853515625,
"logps/oppo_gen": -85.34925079345703,
"logps/oppo_real": -546.5931396484375,
"logps/real": -515.603759765625,
"loss": -1.7931,
"loss/gen": 0.017313145101070404,
"loss/real": -2.309894323348999,
"rewards/accuracies": 1.0,
"rewards/generated": -654.3992919921875,
"rewards/margins": 685.388671875,
"rewards/real": 30.98943328857422,
"step": 189
},
{
"epoch": 0.99,
"grad_norm": 69.46462749238816,
"learning_rate": 4.3279569892473114e-07,
"logits/generated": -1.5553247928619385,
"logits/oppo_generated": -2.8351736068725586,
"logits/oppo_real": -2.9403247833251953,
"logits/real": -1.9800009727478027,
"logps/generated": -723.7879028320312,
"logps/oppo_gen": -70.80876922607422,
"logps/oppo_real": -184.90956115722656,
"logps/real": -161.41104125976562,
"loss": -1.9214,
"loss/gen": 0.08144047111272812,
"loss/real": -2.2349853515625,
"rewards/accuracies": 1.0,
"rewards/generated": -652.9791259765625,
"rewards/margins": 676.4776611328125,
"rewards/real": 23.498506546020508,
"step": 190
},
{
"epoch": 1.0,
"grad_norm": 53.04196866117551,
"learning_rate": 4.32347670250896e-07,
"logits/generated": -1.5968117713928223,
"logits/oppo_generated": -2.9442050457000732,
"logits/oppo_real": -2.914801597595215,
"logits/real": -2.385002374649048,
"logps/generated": -731.5435791015625,
"logps/oppo_gen": -89.47770690917969,
"logps/oppo_real": -427.4810791015625,
"logps/real": -419.70947265625,
"loss": -1.9711,
"loss/gen": 0.019157692790031433,
"loss/real": -2.0777158737182617,
"rewards/accuracies": 1.0,
"rewards/generated": -642.06591796875,
"rewards/margins": 649.8374633789062,
"rewards/real": 7.771598815917969,
"step": 191
}
],
"logging_steps": 1.0,
"max_steps": 1146,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}