|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9973890339425587, |
|
"eval_steps": 100.0, |
|
"global_step": 191, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.0979137420654297, |
|
"logits/oppo_generated": -3.0123190879821777, |
|
"logits/oppo_real": -3.0979137420654297, |
|
"logits/real": -3.0123190879821777, |
|
"logps/generated": -90.71572875976562, |
|
"logps/oppo_gen": -90.71572875976562, |
|
"logps/oppo_real": -483.66973876953125, |
|
"logps/real": -483.66973876953125, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.83146333694458, |
|
"logits/oppo_generated": -2.7920122146606445, |
|
"logits/oppo_real": -2.83146333694458, |
|
"logits/real": -2.7920122146606445, |
|
"logps/generated": -62.34805679321289, |
|
"logps/oppo_gen": -62.34805679321289, |
|
"logps/oppo_real": -294.31817626953125, |
|
"logps/real": -294.31817626953125, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.919513702392578, |
|
"logits/oppo_generated": -2.9197988510131836, |
|
"logits/oppo_real": -2.919513702392578, |
|
"logits/real": -2.9197988510131836, |
|
"logps/generated": -106.68229675292969, |
|
"logps/oppo_gen": -106.68229675292969, |
|
"logps/oppo_real": -366.2132873535156, |
|
"logps/real": -366.2132873535156, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.021902561187744, |
|
"logits/oppo_generated": -2.7693958282470703, |
|
"logits/oppo_real": -3.021902561187744, |
|
"logits/real": -2.7693958282470703, |
|
"logps/generated": -71.52165222167969, |
|
"logps/oppo_gen": -71.52165222167969, |
|
"logps/oppo_real": -280.28497314453125, |
|
"logps/real": -280.28497314453125, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.7911243438720703, |
|
"logits/oppo_generated": -2.798036575317383, |
|
"logits/oppo_real": -2.7911243438720703, |
|
"logits/real": -2.798036575317383, |
|
"logps/generated": -43.92365646362305, |
|
"logps/oppo_gen": -43.92365646362305, |
|
"logps/oppo_real": -143.5323944091797, |
|
"logps/real": -143.5323944091797, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.104882001876831, |
|
"logits/oppo_generated": -2.8853113651275635, |
|
"logits/oppo_real": -3.104882001876831, |
|
"logits/real": -2.8853113651275635, |
|
"logps/generated": -85.01286315917969, |
|
"logps/oppo_gen": -85.01286315917969, |
|
"logps/oppo_real": -311.1739196777344, |
|
"logps/real": -311.1739196777344, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.0711278915405273, |
|
"logits/oppo_generated": -3.0225138664245605, |
|
"logits/oppo_real": -3.0711278915405273, |
|
"logits/real": -3.0225138664245605, |
|
"logps/generated": -104.07185363769531, |
|
"logps/oppo_gen": -104.07185363769531, |
|
"logps/oppo_real": -375.48779296875, |
|
"logps/real": -375.48779296875, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 135.4093691943988, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -2.9351305961608887, |
|
"logits/oppo_generated": -2.9404444694519043, |
|
"logits/oppo_real": -2.9351305961608887, |
|
"logits/real": -2.9404444694519043, |
|
"logps/generated": -94.80602264404297, |
|
"logps/oppo_gen": -94.80602264404297, |
|
"logps/oppo_real": -294.525146484375, |
|
"logps/real": -294.525146484375, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 142.18188175713527, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/generated": -3.0981688499450684, |
|
"logits/oppo_generated": -2.8864831924438477, |
|
"logits/oppo_real": -3.0981688499450684, |
|
"logits/real": -2.8864831924438477, |
|
"logps/generated": -65.3975830078125, |
|
"logps/oppo_gen": -65.3975830078125, |
|
"logps/oppo_real": -312.339111328125, |
|
"logps/real": -312.339111328125, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 139.81928207734796, |
|
"learning_rate": 5e-08, |
|
"logits/generated": -2.960458278656006, |
|
"logits/oppo_generated": -2.756359577178955, |
|
"logits/oppo_real": -2.960458278656006, |
|
"logits/real": -2.756359577178955, |
|
"logps/generated": -76.06861877441406, |
|
"logps/oppo_gen": -76.06861877441406, |
|
"logps/oppo_real": -265.8212890625, |
|
"logps/real": -265.8212890625, |
|
"loss": 5.3891, |
|
"loss/gen": 7.389056205749512, |
|
"loss/real": -2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 146.15293253130434, |
|
"learning_rate": 6.666666666666667e-08, |
|
"logits/generated": -2.5165090560913086, |
|
"logits/oppo_generated": -2.6119813919067383, |
|
"logits/oppo_real": -2.516786575317383, |
|
"logits/real": -2.611802101135254, |
|
"logps/generated": -118.57888793945312, |
|
"logps/oppo_gen": -118.53258514404297, |
|
"logps/oppo_real": -290.76776123046875, |
|
"logps/real": -290.7937927246094, |
|
"loss": 5.3858, |
|
"loss/gen": 7.385635852813721, |
|
"loss/real": -1.9997397661209106, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.04630112648010254, |
|
"rewards/margins": 0.020273208618164062, |
|
"rewards/real": -0.026027917861938477, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 135.79117124283155, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/generated": -3.1297383308410645, |
|
"logits/oppo_generated": -3.0934252738952637, |
|
"logits/oppo_real": -3.130321502685547, |
|
"logits/real": -3.0924453735351562, |
|
"logps/generated": -96.59043884277344, |
|
"logps/oppo_gen": -96.39014434814453, |
|
"logps/oppo_real": -432.87994384765625, |
|
"logps/real": -432.9808349609375, |
|
"loss": 5.3776, |
|
"loss/gen": 7.374272346496582, |
|
"loss/real": -1.9989911317825317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.20029473304748535, |
|
"rewards/margins": 0.09940791130065918, |
|
"rewards/real": -0.10088682174682617, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 138.10590224307938, |
|
"learning_rate": 1e-07, |
|
"logits/generated": -2.457440137863159, |
|
"logits/oppo_generated": -2.3860814571380615, |
|
"logits/oppo_real": -2.459930419921875, |
|
"logits/real": -2.383344888687134, |
|
"logps/generated": -67.26084899902344, |
|
"logps/oppo_gen": -66.88719940185547, |
|
"logps/oppo_real": -307.066650390625, |
|
"logps/real": -307.28009033203125, |
|
"loss": 5.3544, |
|
"loss/gen": 7.361501216888428, |
|
"loss/real": -1.9978655576705933, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.37365150451660156, |
|
"rewards/margins": 0.16021156311035156, |
|
"rewards/real": -0.21343994140625, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 134.5610848160604, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"logits/generated": -2.897860050201416, |
|
"logits/oppo_generated": -2.7236456871032715, |
|
"logits/oppo_real": -2.906421661376953, |
|
"logits/real": -2.7175283432006836, |
|
"logps/generated": -55.62891387939453, |
|
"logps/oppo_gen": -54.473785400390625, |
|
"logps/oppo_real": -214.07330322265625, |
|
"logps/real": -214.57391357421875, |
|
"loss": 5.3058, |
|
"loss/gen": 7.3042426109313965, |
|
"loss/real": -1.9949939250946045, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -1.155130386352539, |
|
"rewards/margins": 0.6545138359069824, |
|
"rewards/real": -0.5006165504455566, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 143.7638580276698, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"logits/generated": -2.9454569816589355, |
|
"logits/oppo_generated": -2.880186080932617, |
|
"logits/oppo_real": -2.954317092895508, |
|
"logits/real": -2.869965076446533, |
|
"logps/generated": -69.53794860839844, |
|
"logps/oppo_gen": -67.36585998535156, |
|
"logps/oppo_real": -272.5278015136719, |
|
"logps/real": -273.41131591796875, |
|
"loss": 5.2534, |
|
"loss/gen": 7.230417251586914, |
|
"loss/real": -1.9911651611328125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.172096014022827, |
|
"rewards/margins": 1.2886085510253906, |
|
"rewards/real": -0.8834874629974365, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 136.58602486162337, |
|
"learning_rate": 1.5e-07, |
|
"logits/generated": -2.9922404289245605, |
|
"logits/oppo_generated": -2.839021682739258, |
|
"logits/oppo_real": -3.0036399364471436, |
|
"logits/real": -2.8292860984802246, |
|
"logps/generated": -84.50007629394531, |
|
"logps/oppo_gen": -81.83857727050781, |
|
"logps/oppo_real": -431.7451171875, |
|
"logps/real": -432.54998779296875, |
|
"loss": 5.221, |
|
"loss/gen": 7.19518518447876, |
|
"loss/real": -1.9919514656066895, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.661508798599243, |
|
"rewards/margins": 1.8566477298736572, |
|
"rewards/real": -0.8048610687255859, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 129.87295474809218, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -2.658919334411621, |
|
"logits/oppo_generated": -2.676953077316284, |
|
"logits/oppo_real": -2.681703805923462, |
|
"logits/real": -2.6546268463134766, |
|
"logps/generated": -79.03924560546875, |
|
"logps/oppo_gen": -73.95083618164062, |
|
"logps/oppo_real": -231.33941650390625, |
|
"logps/real": -233.62753295898438, |
|
"loss": 4.9962, |
|
"loss/gen": 7.023112773895264, |
|
"loss/real": -1.9771190881729126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.088409423828125, |
|
"rewards/margins": 2.800309896469116, |
|
"rewards/real": -2.2880992889404297, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 128.38777930550867, |
|
"learning_rate": 1.833333333333333e-07, |
|
"logits/generated": -3.0433835983276367, |
|
"logits/oppo_generated": -2.802915096282959, |
|
"logits/oppo_real": -3.083519458770752, |
|
"logits/real": -2.778608560562134, |
|
"logps/generated": -60.27662658691406, |
|
"logps/oppo_gen": -53.611244201660156, |
|
"logps/oppo_real": -254.54031372070312, |
|
"logps/real": -258.7552490234375, |
|
"loss": 4.9034, |
|
"loss/gen": 6.913381099700928, |
|
"loss/real": -1.957850694656372, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -6.66538143157959, |
|
"rewards/margins": 2.4504411220550537, |
|
"rewards/real": -4.214940547943115, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 134.49697225913675, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -3.050615072250366, |
|
"logits/oppo_generated": -2.9067916870117188, |
|
"logits/oppo_real": -3.0934958457946777, |
|
"logits/real": -2.865483283996582, |
|
"logps/generated": -97.78262329101562, |
|
"logps/oppo_gen": -87.72978210449219, |
|
"logps/oppo_real": -446.26251220703125, |
|
"logps/real": -449.0770263671875, |
|
"loss": 4.8312, |
|
"loss/gen": 6.683139324188232, |
|
"loss/real": -1.9718552827835083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.052835464477539, |
|
"rewards/margins": 7.238361358642578, |
|
"rewards/real": -2.814474105834961, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 132.29558967280428, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"logits/generated": -2.8847241401672363, |
|
"logits/oppo_generated": -3.023085832595825, |
|
"logits/oppo_real": -2.9624710083007812, |
|
"logits/real": -2.9275739192962646, |
|
"logps/generated": -84.17431640625, |
|
"logps/oppo_gen": -66.7940902709961, |
|
"logps/oppo_real": -276.38616943359375, |
|
"logps/real": -282.4690856933594, |
|
"loss": 4.4138, |
|
"loss/gen": 6.213091850280762, |
|
"loss/real": -1.9391708374023438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.380220413208008, |
|
"rewards/margins": 11.297311782836914, |
|
"rewards/real": -6.082908630371094, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 122.03695169138321, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/generated": -2.785062313079834, |
|
"logits/oppo_generated": -2.763364315032959, |
|
"logits/oppo_real": -2.8705592155456543, |
|
"logits/real": -2.6673243045806885, |
|
"logps/generated": -97.63499450683594, |
|
"logps/oppo_gen": -77.94976043701172, |
|
"logps/oppo_real": -317.0445861816406, |
|
"logps/real": -323.0356750488281, |
|
"loss": 4.265, |
|
"loss/gen": 6.084134578704834, |
|
"loss/real": -1.9400889873504639, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.68524169921875, |
|
"rewards/margins": 13.694145202636719, |
|
"rewards/real": -5.991097450256348, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 122.03695169138321, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/generated": -2.726562261581421, |
|
"logits/oppo_generated": -2.855116844177246, |
|
"logits/oppo_real": -2.865086078643799, |
|
"logits/real": -2.7427496910095215, |
|
"logps/generated": -82.3883285522461, |
|
"logps/oppo_gen": -62.901329040527344, |
|
"logps/oppo_real": -202.70956420898438, |
|
"logps/real": -216.28871154785156, |
|
"loss": 4.0626, |
|
"loss/gen": 6.088003635406494, |
|
"loss/real": -1.8642082214355469, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -19.48699951171875, |
|
"rewards/margins": 5.9078264236450195, |
|
"rewards/real": -13.57917308807373, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 129.4518717878386, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -2.3680832386016846, |
|
"logits/oppo_generated": -2.7028326988220215, |
|
"logits/oppo_real": -2.4785587787628174, |
|
"logits/real": -2.602602958679199, |
|
"logps/generated": -85.23381042480469, |
|
"logps/oppo_gen": -63.708274841308594, |
|
"logps/oppo_real": -408.9969482421875, |
|
"logps/real": -411.189697265625, |
|
"loss": 4.0392, |
|
"loss/gen": 5.969209671020508, |
|
"loss/real": -1.978072166442871, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.525535583496094, |
|
"rewards/margins": 19.332752227783203, |
|
"rewards/real": -2.192781925201416, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 126.05579509313525, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"logits/generated": -2.5577526092529297, |
|
"logits/oppo_generated": -2.848795175552368, |
|
"logits/oppo_real": -2.7064318656921387, |
|
"logits/real": -2.7243504524230957, |
|
"logps/generated": -106.70217895507812, |
|
"logps/oppo_gen": -80.21543884277344, |
|
"logps/oppo_real": -328.6651611328125, |
|
"logps/real": -338.0718078613281, |
|
"loss": 3.8879, |
|
"loss/gen": 5.678422451019287, |
|
"loss/real": -1.9059333801269531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.486736297607422, |
|
"rewards/margins": 17.080078125, |
|
"rewards/real": -9.406658172607422, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 126.74033031409142, |
|
"learning_rate": 2.833333333333333e-07, |
|
"logits/generated": -2.780156135559082, |
|
"logits/oppo_generated": -2.961332321166992, |
|
"logits/oppo_real": -2.937591075897217, |
|
"logits/real": -2.828620672225952, |
|
"logps/generated": -102.30010986328125, |
|
"logps/oppo_gen": -69.95628356933594, |
|
"logps/oppo_real": -426.2795104980469, |
|
"logps/real": -442.405029296875, |
|
"loss": 3.6222, |
|
"loss/gen": 5.359460830688477, |
|
"loss/real": -1.838744878768921, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.34381866455078, |
|
"rewards/margins": 16.218303680419922, |
|
"rewards/real": -16.12551498413086, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 118.32002749429193, |
|
"learning_rate": 3e-07, |
|
"logits/generated": -2.8048362731933594, |
|
"logits/oppo_generated": -2.8632454872131348, |
|
"logits/oppo_real": -2.9817347526550293, |
|
"logits/real": -2.6736109256744385, |
|
"logps/generated": -134.10989379882812, |
|
"logps/oppo_gen": -92.99905395507812, |
|
"logps/oppo_real": -293.31121826171875, |
|
"logps/real": -314.2544250488281, |
|
"loss": 3.3844, |
|
"loss/gen": 4.9427008628845215, |
|
"loss/real": -1.7905679941177368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.11084747314453, |
|
"rewards/margins": 20.167648315429688, |
|
"rewards/real": -20.943199157714844, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 116.9737175007168, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/generated": -2.8670060634613037, |
|
"logits/oppo_generated": -2.9514551162719727, |
|
"logits/oppo_real": -3.061511993408203, |
|
"logits/real": -2.7576708793640137, |
|
"logps/generated": -200.80691528320312, |
|
"logps/oppo_gen": -153.51296997070312, |
|
"logps/oppo_real": -477.2593994140625, |
|
"logps/real": -495.4168701171875, |
|
"loss": 3.1117, |
|
"loss/gen": 4.679584503173828, |
|
"loss/real": -1.818424940109253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.29395294189453, |
|
"rewards/margins": 29.136451721191406, |
|
"rewards/real": -18.157499313354492, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 114.94145928471589, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/generated": -2.597921848297119, |
|
"logits/oppo_generated": -2.8073906898498535, |
|
"logits/oppo_real": -2.8212432861328125, |
|
"logits/real": -2.581136703491211, |
|
"logps/generated": -125.5290756225586, |
|
"logps/oppo_gen": -73.3681411743164, |
|
"logps/oppo_real": -366.8509826660156, |
|
"logps/real": -396.5523986816406, |
|
"loss": 2.8982, |
|
"loss/gen": 4.413166522979736, |
|
"loss/real": -1.7029860019683838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.16094207763672, |
|
"rewards/margins": 22.459529876708984, |
|
"rewards/real": -29.701412200927734, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 105.4175848115484, |
|
"learning_rate": 3.5e-07, |
|
"logits/generated": -2.5924081802368164, |
|
"logits/oppo_generated": -2.780890941619873, |
|
"logits/oppo_real": -2.8437681198120117, |
|
"logits/real": -2.549656629562378, |
|
"logps/generated": -120.14186096191406, |
|
"logps/oppo_gen": -64.063720703125, |
|
"logps/oppo_real": -288.26275634765625, |
|
"logps/real": -320.1248779296875, |
|
"loss": 2.627, |
|
"loss/gen": 4.243507385253906, |
|
"loss/real": -1.6813790798187256, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -56.078147888183594, |
|
"rewards/margins": 24.216047286987305, |
|
"rewards/real": -31.862098693847656, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 103.82670827663998, |
|
"learning_rate": 3.666666666666666e-07, |
|
"logits/generated": -2.577294111251831, |
|
"logits/oppo_generated": -2.7607855796813965, |
|
"logits/oppo_real": -2.8285064697265625, |
|
"logits/real": -2.4904675483703613, |
|
"logps/generated": -202.02459716796875, |
|
"logps/oppo_gen": -138.3541259765625, |
|
"logps/oppo_real": -562.3087158203125, |
|
"logps/real": -597.827392578125, |
|
"loss": 2.3475, |
|
"loss/gen": 3.986393928527832, |
|
"loss/real": -1.6448135375976562, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -63.67047882080078, |
|
"rewards/margins": 28.15182113647461, |
|
"rewards/real": -35.51865768432617, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 100.94574269750062, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"logits/generated": -2.498058319091797, |
|
"logits/oppo_generated": -2.669119358062744, |
|
"logits/oppo_real": -2.8678927421569824, |
|
"logits/real": -2.371706008911133, |
|
"logps/generated": -145.69668579101562, |
|
"logps/oppo_gen": -70.41365051269531, |
|
"logps/oppo_real": -241.83944702148438, |
|
"logps/real": -286.0458984375, |
|
"loss": 2.0654, |
|
"loss/gen": 3.6289942264556885, |
|
"loss/real": -1.5579355955123901, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -75.28302001953125, |
|
"rewards/margins": 31.076583862304688, |
|
"rewards/real": -44.20643997192383, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 98.157543369384, |
|
"learning_rate": 4e-07, |
|
"logits/generated": -2.1765198707580566, |
|
"logits/oppo_generated": -2.742915630340576, |
|
"logits/oppo_real": -2.5176854133605957, |
|
"logits/real": -2.3865435123443604, |
|
"logps/generated": -154.5072021484375, |
|
"logps/oppo_gen": -88.06977844238281, |
|
"logps/oppo_real": -250.35305786132812, |
|
"logps/real": -301.65313720703125, |
|
"loss": 1.9822, |
|
"loss/gen": 3.8242578506469727, |
|
"loss/real": -1.486999273300171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -66.43741607666016, |
|
"rewards/margins": 15.137344360351562, |
|
"rewards/real": -51.300071716308594, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 91.26540965776684, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/generated": -2.5197973251342773, |
|
"logits/oppo_generated": -2.7696216106414795, |
|
"logits/oppo_real": -2.8814268112182617, |
|
"logits/real": -2.416097640991211, |
|
"logps/generated": -142.49354553222656, |
|
"logps/oppo_gen": -71.01982116699219, |
|
"logps/oppo_real": -175.82728576660156, |
|
"logps/real": -231.18699645996094, |
|
"loss": 1.809, |
|
"loss/gen": 3.6547460556030273, |
|
"loss/real": -1.446402907371521, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -71.47373962402344, |
|
"rewards/margins": 16.114028930664062, |
|
"rewards/real": -55.359710693359375, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 77.6623991529202, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"logits/generated": -2.51528263092041, |
|
"logits/oppo_generated": -2.669196128845215, |
|
"logits/oppo_real": -3.0089945793151855, |
|
"logits/real": -2.259756565093994, |
|
"logps/generated": -147.68319702148438, |
|
"logps/oppo_gen": -57.30543518066406, |
|
"logps/oppo_real": -326.7079772949219, |
|
"logps/real": -385.50555419921875, |
|
"loss": 1.5094, |
|
"loss/gen": 3.075594902038574, |
|
"loss/real": -1.4120240211486816, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -90.37777709960938, |
|
"rewards/margins": 31.580162048339844, |
|
"rewards/real": -58.797607421875, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 68.83081542698554, |
|
"learning_rate": 4.5e-07, |
|
"logits/generated": -2.14363431930542, |
|
"logits/oppo_generated": -2.504408359527588, |
|
"logits/oppo_real": -2.6590046882629395, |
|
"logits/real": -1.987848162651062, |
|
"logps/generated": -219.39852905273438, |
|
"logps/oppo_gen": -79.17024230957031, |
|
"logps/oppo_real": -203.21951293945312, |
|
"logps/real": -295.66925048828125, |
|
"loss": 1.3109, |
|
"loss/gen": 2.342175006866455, |
|
"loss/real": -1.075502634048462, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -140.228271484375, |
|
"rewards/margins": 47.77854919433594, |
|
"rewards/real": -92.4497299194336, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 67.77129364404593, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"logits/generated": -2.2981181144714355, |
|
"logits/oppo_generated": -2.8459527492523193, |
|
"logits/oppo_real": -2.959359645843506, |
|
"logits/real": -2.237192153930664, |
|
"logps/generated": -190.5108642578125, |
|
"logps/oppo_gen": -73.61311340332031, |
|
"logps/oppo_real": -330.1354675292969, |
|
"logps/real": -428.3790588378906, |
|
"loss": 1.2112, |
|
"loss/gen": 2.471275806427002, |
|
"loss/real": -1.017564058303833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -116.89774322509766, |
|
"rewards/margins": 18.654144287109375, |
|
"rewards/real": -98.24359893798828, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 55.4256192349542, |
|
"learning_rate": 4.833333333333333e-07, |
|
"logits/generated": -2.098021984100342, |
|
"logits/oppo_generated": -2.913597345352173, |
|
"logits/oppo_real": -2.7888224124908447, |
|
"logits/real": -2.2414162158966064, |
|
"logps/generated": -163.77145385742188, |
|
"logps/oppo_gen": -43.92766189575195, |
|
"logps/oppo_real": -201.2423858642578, |
|
"logps/real": -328.263916015625, |
|
"loss": 1.1323, |
|
"loss/gen": 2.4318315982818604, |
|
"loss/real": -0.729784369468689, |
|
"rewards/accuracies": 0.375, |
|
"rewards/generated": -119.84378814697266, |
|
"rewards/margins": -7.177766799926758, |
|
"rewards/real": -127.02156066894531, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 43.58372700636816, |
|
"learning_rate": 5e-07, |
|
"logits/generated": -2.149717330932617, |
|
"logits/oppo_generated": -2.8873682022094727, |
|
"logits/oppo_real": -2.953073024749756, |
|
"logits/real": -2.14795184135437, |
|
"logps/generated": -204.06512451171875, |
|
"logps/oppo_gen": -63.10968017578125, |
|
"logps/oppo_real": -290.7437744140625, |
|
"logps/real": -395.0226745605469, |
|
"loss": 0.9722, |
|
"loss/gen": 1.9304319620132446, |
|
"loss/real": -0.9572109580039978, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -140.95542907714844, |
|
"rewards/margins": 36.676517486572266, |
|
"rewards/real": -104.2789077758789, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 43.28064925124593, |
|
"learning_rate": 4.995519713261649e-07, |
|
"logits/generated": -2.0406126976013184, |
|
"logits/oppo_generated": -2.9155023097991943, |
|
"logits/oppo_real": -2.8444814682006836, |
|
"logits/real": -2.224266767501831, |
|
"logps/generated": -214.7823486328125, |
|
"logps/oppo_gen": -57.36619567871094, |
|
"logps/oppo_real": -319.1702880859375, |
|
"logps/real": -470.078369140625, |
|
"loss": 0.9536, |
|
"loss/gen": 1.6517560482025146, |
|
"loss/real": -0.49091899394989014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -157.41615295410156, |
|
"rewards/margins": 6.508047103881836, |
|
"rewards/real": -150.90811157226562, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 39.787796189107254, |
|
"learning_rate": 4.991039426523297e-07, |
|
"logits/generated": -2.045642614364624, |
|
"logits/oppo_generated": -2.817904233932495, |
|
"logits/oppo_real": -2.799046277999878, |
|
"logits/real": -2.124079704284668, |
|
"logps/generated": -240.5778350830078, |
|
"logps/oppo_gen": -62.37173080444336, |
|
"logps/oppo_real": -271.6532287597656, |
|
"logps/real": -416.31292724609375, |
|
"loss": 0.8435, |
|
"loss/gen": 1.3889131546020508, |
|
"loss/real": -0.5534029603004456, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -178.2061004638672, |
|
"rewards/margins": 33.546390533447266, |
|
"rewards/real": -144.65969848632812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 39.13177763030722, |
|
"learning_rate": 4.986559139784946e-07, |
|
"logits/generated": -2.117490291595459, |
|
"logits/oppo_generated": -2.833721160888672, |
|
"logits/oppo_real": -2.8886466026306152, |
|
"logits/real": -2.132415533065796, |
|
"logps/generated": -227.88870239257812, |
|
"logps/oppo_gen": -62.6229248046875, |
|
"logps/oppo_real": -254.74127197265625, |
|
"logps/real": -392.49224853515625, |
|
"loss": 0.8003, |
|
"loss/gen": 1.5646700859069824, |
|
"loss/real": -0.6224902868270874, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -165.26576232910156, |
|
"rewards/margins": 27.51479148864746, |
|
"rewards/real": -137.7509765625, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 42.057857807077056, |
|
"learning_rate": 4.982078853046595e-07, |
|
"logits/generated": -2.0003559589385986, |
|
"logits/oppo_generated": -2.829643726348877, |
|
"logits/oppo_real": -2.9243969917297363, |
|
"logits/real": -2.0654373168945312, |
|
"logps/generated": -399.18463134765625, |
|
"logps/oppo_gen": -93.32018280029297, |
|
"logps/oppo_real": -296.91900634765625, |
|
"logps/real": -458.50836181640625, |
|
"loss": 0.7365, |
|
"loss/gen": 0.6350959539413452, |
|
"loss/real": -0.3841061592102051, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -305.86444091796875, |
|
"rewards/margins": 144.27505493164062, |
|
"rewards/real": -161.5894012451172, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 45.58806028155152, |
|
"learning_rate": 4.977598566308244e-07, |
|
"logits/generated": -1.7739081382751465, |
|
"logits/oppo_generated": -2.5540552139282227, |
|
"logits/oppo_real": -2.583406448364258, |
|
"logits/real": -1.800355315208435, |
|
"logps/generated": -248.06814575195312, |
|
"logps/oppo_gen": -62.72918701171875, |
|
"logps/oppo_real": -156.4404296875, |
|
"logps/real": -300.55322265625, |
|
"loss": 0.6065, |
|
"loss/gen": 1.3522555828094482, |
|
"loss/real": -0.5588721036911011, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -185.33897399902344, |
|
"rewards/margins": 41.22618103027344, |
|
"rewards/real": -144.11279296875, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 43.27926721541303, |
|
"learning_rate": 4.973118279569893e-07, |
|
"logits/generated": -1.790377140045166, |
|
"logits/oppo_generated": -2.83392333984375, |
|
"logits/oppo_real": -2.716136932373047, |
|
"logits/real": -1.9894132614135742, |
|
"logps/generated": -220.31033325195312, |
|
"logps/oppo_gen": -47.22636795043945, |
|
"logps/oppo_real": -154.70913696289062, |
|
"logps/real": -310.2235412597656, |
|
"loss": 0.5664, |
|
"loss/gen": 1.3508105278015137, |
|
"loss/real": -0.44485586881637573, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -173.08395385742188, |
|
"rewards/margins": 17.569549560546875, |
|
"rewards/real": -155.514404296875, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 37.24892019460514, |
|
"learning_rate": 4.96863799283154e-07, |
|
"logits/generated": -2.4816246032714844, |
|
"logits/oppo_generated": -2.894857883453369, |
|
"logits/oppo_real": -3.2226767539978027, |
|
"logits/real": -2.1920199394226074, |
|
"logps/generated": -340.05987548828125, |
|
"logps/oppo_gen": -105.26202392578125, |
|
"logps/oppo_real": -493.02239990234375, |
|
"logps/real": -641.823974609375, |
|
"loss": 0.535, |
|
"loss/gen": 0.9430114030838013, |
|
"loss/real": -0.5119848847389221, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -234.7978515625, |
|
"rewards/margins": 85.996337890625, |
|
"rewards/real": -148.801513671875, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 34.75340901949854, |
|
"learning_rate": 4.96415770609319e-07, |
|
"logits/generated": -1.8138926029205322, |
|
"logits/oppo_generated": -2.9227819442749023, |
|
"logits/oppo_real": -2.7699246406555176, |
|
"logits/real": -2.158921957015991, |
|
"logps/generated": -272.76275634765625, |
|
"logps/oppo_gen": -65.69087982177734, |
|
"logps/oppo_real": -185.58651733398438, |
|
"logps/real": -307.59954833984375, |
|
"loss": 0.472, |
|
"loss/gen": 1.0596184730529785, |
|
"loss/real": -0.7798694372177124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -207.07186889648438, |
|
"rewards/margins": 85.05882263183594, |
|
"rewards/real": -122.01304626464844, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 30.414269200427842, |
|
"learning_rate": 4.959677419354838e-07, |
|
"logits/generated": -2.14742374420166, |
|
"logits/oppo_generated": -2.7580342292785645, |
|
"logits/oppo_real": -2.948944330215454, |
|
"logits/real": -2.00089168548584, |
|
"logps/generated": -289.2415771484375, |
|
"logps/oppo_gen": -83.18161010742188, |
|
"logps/oppo_real": -301.14892578125, |
|
"logps/real": -444.14013671875, |
|
"loss": 0.4491, |
|
"loss/gen": 1.023041009902954, |
|
"loss/real": -0.5700880289077759, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -206.05996704101562, |
|
"rewards/margins": 63.068756103515625, |
|
"rewards/real": -142.9912109375, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 34.17290379109778, |
|
"learning_rate": 4.955197132616487e-07, |
|
"logits/generated": -2.031747579574585, |
|
"logits/oppo_generated": -2.9268949031829834, |
|
"logits/oppo_real": -2.7888307571411133, |
|
"logits/real": -2.1852447986602783, |
|
"logps/generated": -290.5462341308594, |
|
"logps/oppo_gen": -73.60729217529297, |
|
"logps/oppo_real": -385.06817626953125, |
|
"logps/real": -524.4448852539062, |
|
"loss": 0.3841, |
|
"loss/gen": 0.9104207158088684, |
|
"loss/real": -0.6062330007553101, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -216.93894958496094, |
|
"rewards/margins": 77.56224060058594, |
|
"rewards/real": -139.376708984375, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 29.687072365341113, |
|
"learning_rate": 4.950716845878136e-07, |
|
"logits/generated": -2.092398166656494, |
|
"logits/oppo_generated": -2.939999580383301, |
|
"logits/oppo_real": -2.972858428955078, |
|
"logits/real": -2.12141752243042, |
|
"logps/generated": -248.61563110351562, |
|
"logps/oppo_gen": -63.58892059326172, |
|
"logps/oppo_real": -292.31512451171875, |
|
"logps/real": -430.34051513671875, |
|
"loss": 0.3154, |
|
"loss/gen": 1.2107611894607544, |
|
"loss/real": -0.6197463274002075, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -185.02670288085938, |
|
"rewards/margins": 47.0013427734375, |
|
"rewards/real": -138.02536010742188, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 34.51648439966765, |
|
"learning_rate": 4.946236559139784e-07, |
|
"logits/generated": -2.0396437644958496, |
|
"logits/oppo_generated": -2.9997801780700684, |
|
"logits/oppo_real": -3.0686826705932617, |
|
"logits/real": -2.159646511077881, |
|
"logps/generated": -287.21685791015625, |
|
"logps/oppo_gen": -74.3673095703125, |
|
"logps/oppo_real": -280.9033203125, |
|
"logps/real": -418.5960388183594, |
|
"loss": 0.2507, |
|
"loss/gen": 0.9513455629348755, |
|
"loss/real": -0.6230726838111877, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -212.8495330810547, |
|
"rewards/margins": 75.15680694580078, |
|
"rewards/real": -137.69273376464844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 34.51648439966765, |
|
"learning_rate": 4.946236559139784e-07, |
|
"logits/generated": -1.9025869369506836, |
|
"logits/oppo_generated": -2.9109854698181152, |
|
"logits/oppo_real": -2.8473780155181885, |
|
"logits/real": -2.126164197921753, |
|
"logps/generated": -262.5989990234375, |
|
"logps/oppo_gen": -68.43426513671875, |
|
"logps/oppo_real": -255.580810546875, |
|
"logps/real": -343.41925048828125, |
|
"loss": 0.2655, |
|
"loss/gen": 1.116389513015747, |
|
"loss/real": -1.1216154098510742, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -194.16476440429688, |
|
"rewards/margins": 106.3262939453125, |
|
"rewards/real": -87.83845520019531, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 48.07575605554737, |
|
"learning_rate": 4.941756272401433e-07, |
|
"logits/generated": -1.9331986904144287, |
|
"logits/oppo_generated": -2.700697422027588, |
|
"logits/oppo_real": -2.7499947547912598, |
|
"logits/real": -1.9495654106140137, |
|
"logps/generated": -310.9825744628906, |
|
"logps/oppo_gen": -70.83297729492188, |
|
"logps/oppo_real": -279.63055419921875, |
|
"logps/real": -442.52166748046875, |
|
"loss": 0.2175, |
|
"loss/gen": 0.9699513912200928, |
|
"loss/real": -0.37108901143074036, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -240.14959716796875, |
|
"rewards/margins": 77.25849914550781, |
|
"rewards/real": -162.89111328125, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 42.34599711519009, |
|
"learning_rate": 4.937275985663082e-07, |
|
"logits/generated": -2.342167854309082, |
|
"logits/oppo_generated": -2.8682141304016113, |
|
"logits/oppo_real": -3.2790589332580566, |
|
"logits/real": -2.054896116256714, |
|
"logps/generated": -309.45147705078125, |
|
"logps/oppo_gen": -81.61607360839844, |
|
"logps/oppo_real": -221.72312927246094, |
|
"logps/real": -368.44476318359375, |
|
"loss": 0.1235, |
|
"loss/gen": 0.8315409421920776, |
|
"loss/real": -0.532783567905426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -227.83538818359375, |
|
"rewards/margins": 81.11372375488281, |
|
"rewards/real": -146.72164916992188, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 25.769006325656385, |
|
"learning_rate": 4.932795698924731e-07, |
|
"logits/generated": -1.7544469833374023, |
|
"logits/oppo_generated": -2.7301034927368164, |
|
"logits/oppo_real": -2.709321975708008, |
|
"logits/real": -1.932969331741333, |
|
"logps/generated": -319.9211730957031, |
|
"logps/oppo_gen": -80.54927062988281, |
|
"logps/oppo_real": -310.97271728515625, |
|
"logps/real": -400.190673828125, |
|
"loss": 0.1326, |
|
"loss/gen": 0.9462176561355591, |
|
"loss/real": -1.1078202724456787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -239.37188720703125, |
|
"rewards/margins": 150.15391540527344, |
|
"rewards/real": -89.21797943115234, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 25.790073064114104, |
|
"learning_rate": 4.92831541218638e-07, |
|
"logits/generated": -1.9824426174163818, |
|
"logits/oppo_generated": -2.766693592071533, |
|
"logits/oppo_real": -2.925718307495117, |
|
"logits/real": -1.925614595413208, |
|
"logps/generated": -310.7677001953125, |
|
"logps/oppo_gen": -71.80207824707031, |
|
"logps/oppo_real": -235.78529357910156, |
|
"logps/real": -371.66021728515625, |
|
"loss": 0.0871, |
|
"loss/gen": 0.9566553831100464, |
|
"loss/real": -0.641250491142273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -238.96560668945312, |
|
"rewards/margins": 103.09065246582031, |
|
"rewards/real": -135.8749542236328, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 28.998453033227694, |
|
"learning_rate": 4.923835125448029e-07, |
|
"logits/generated": -1.9766473770141602, |
|
"logits/oppo_generated": -2.7838592529296875, |
|
"logits/oppo_real": -2.928971767425537, |
|
"logits/real": -1.9118558168411255, |
|
"logps/generated": -332.1834411621094, |
|
"logps/oppo_gen": -79.61759185791016, |
|
"logps/oppo_real": -232.3800811767578, |
|
"logps/real": -358.1265869140625, |
|
"loss": 0.073, |
|
"loss/gen": 0.9157878160476685, |
|
"loss/real": -0.7425349950790405, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -252.56585693359375, |
|
"rewards/margins": 126.81934356689453, |
|
"rewards/real": -125.74649810791016, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 22.703764830970215, |
|
"learning_rate": 4.919354838709677e-07, |
|
"logits/generated": -1.9628534317016602, |
|
"logits/oppo_generated": -2.97432541847229, |
|
"logits/oppo_real": -2.8353166580200195, |
|
"logits/real": -2.0759224891662598, |
|
"logps/generated": -339.04296875, |
|
"logps/oppo_gen": -77.88506317138672, |
|
"logps/oppo_real": -276.94805908203125, |
|
"logps/real": -405.2261047363281, |
|
"loss": 0.0645, |
|
"loss/gen": 0.6549752950668335, |
|
"loss/real": -0.7172196507453918, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -261.15789794921875, |
|
"rewards/margins": 132.8798828125, |
|
"rewards/real": -128.2780303955078, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 25.084795160459006, |
|
"learning_rate": 4.914874551971326e-07, |
|
"logits/generated": -1.7674864530563354, |
|
"logits/oppo_generated": -2.975834846496582, |
|
"logits/oppo_real": -2.730165719985962, |
|
"logits/real": -2.051088333129883, |
|
"logps/generated": -442.43011474609375, |
|
"logps/oppo_gen": -74.65117645263672, |
|
"logps/oppo_real": -194.0476531982422, |
|
"logps/real": -333.55999755859375, |
|
"loss": -0.0438, |
|
"loss/gen": 0.723225474357605, |
|
"loss/real": -0.6048767566680908, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -367.7789306640625, |
|
"rewards/margins": 228.2666015625, |
|
"rewards/real": -139.5123291015625, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 26.174395867401362, |
|
"learning_rate": 4.910394265232975e-07, |
|
"logits/generated": -1.5783250331878662, |
|
"logits/oppo_generated": -2.842722177505493, |
|
"logits/oppo_real": -2.5714492797851562, |
|
"logits/real": -1.8060765266418457, |
|
"logps/generated": -329.1837158203125, |
|
"logps/oppo_gen": -54.848045349121094, |
|
"logps/oppo_real": -181.15826416015625, |
|
"logps/real": -288.05316162109375, |
|
"loss": -0.1401, |
|
"loss/gen": 0.6347978115081787, |
|
"loss/real": -0.9310512542724609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -274.3356628417969, |
|
"rewards/margins": 167.4407958984375, |
|
"rewards/real": -106.89486694335938, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 23.366499227158794, |
|
"learning_rate": 4.905913978494624e-07, |
|
"logits/generated": -1.848489761352539, |
|
"logits/oppo_generated": -2.70564603805542, |
|
"logits/oppo_real": -2.8469276428222656, |
|
"logits/real": -1.7071902751922607, |
|
"logps/generated": -299.8435974121094, |
|
"logps/oppo_gen": -58.444156646728516, |
|
"logps/oppo_real": -162.31228637695312, |
|
"logps/real": -293.0637512207031, |
|
"loss": -0.1455, |
|
"loss/gen": 0.7676070332527161, |
|
"loss/real": -0.6924855709075928, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -241.39942932128906, |
|
"rewards/margins": 110.64800262451172, |
|
"rewards/real": -130.75144958496094, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 23.965715716104874, |
|
"learning_rate": 4.901433691756272e-07, |
|
"logits/generated": -2.067742347717285, |
|
"logits/oppo_generated": -2.7352287769317627, |
|
"logits/oppo_real": -3.062769889831543, |
|
"logits/real": -1.7817035913467407, |
|
"logps/generated": -279.8091125488281, |
|
"logps/oppo_gen": -58.14486312866211, |
|
"logps/oppo_real": -235.44610595703125, |
|
"logps/real": -335.95269775390625, |
|
"loss": -0.1697, |
|
"loss/gen": 0.8564858436584473, |
|
"loss/real": -0.9949342012405396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -221.66424560546875, |
|
"rewards/margins": 121.15766143798828, |
|
"rewards/real": -100.506591796875, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 22.71430331741021, |
|
"learning_rate": 4.896953405017921e-07, |
|
"logits/generated": -1.850874900817871, |
|
"logits/oppo_generated": -2.81040096282959, |
|
"logits/oppo_real": -2.7932534217834473, |
|
"logits/real": -1.9105334281921387, |
|
"logps/generated": -315.2555236816406, |
|
"logps/oppo_gen": -83.87113952636719, |
|
"logps/oppo_real": -450.6523132324219, |
|
"logps/real": -546.0828857421875, |
|
"loss": -0.1951, |
|
"loss/gen": 0.9174035787582397, |
|
"loss/real": -1.0456944704055786, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.38438415527344, |
|
"rewards/margins": 135.95382690429688, |
|
"rewards/real": -95.43055725097656, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 24.960812793115707, |
|
"learning_rate": 4.89247311827957e-07, |
|
"logits/generated": -1.569549560546875, |
|
"logits/oppo_generated": -2.6111321449279785, |
|
"logits/oppo_real": -2.645878314971924, |
|
"logits/real": -1.4946357011795044, |
|
"logps/generated": -293.79986572265625, |
|
"logps/oppo_gen": -46.035884857177734, |
|
"logps/oppo_real": -112.12235260009766, |
|
"logps/real": -228.71566772460938, |
|
"loss": -0.2873, |
|
"loss/gen": 0.8553179502487183, |
|
"loss/real": -0.8340668082237244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -247.76397705078125, |
|
"rewards/margins": 131.170654296875, |
|
"rewards/real": -116.59332275390625, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 28.01833397213722, |
|
"learning_rate": 4.887992831541218e-07, |
|
"logits/generated": -1.4229357242584229, |
|
"logits/oppo_generated": -2.796260356903076, |
|
"logits/oppo_real": -2.7083005905151367, |
|
"logits/real": -1.875757098197937, |
|
"logps/generated": -314.1737365722656, |
|
"logps/oppo_gen": -87.55534362792969, |
|
"logps/oppo_real": -460.838623046875, |
|
"logps/real": -533.9716796875, |
|
"loss": -0.23, |
|
"loss/gen": 0.8128476142883301, |
|
"loss/real": -1.268669843673706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -226.61837768554688, |
|
"rewards/margins": 153.48536682128906, |
|
"rewards/real": -73.13301849365234, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 25.329698536099624, |
|
"learning_rate": 4.883512544802867e-07, |
|
"logits/generated": -1.8850572109222412, |
|
"logits/oppo_generated": -2.842528820037842, |
|
"logits/oppo_real": -3.0462865829467773, |
|
"logits/real": -1.8970359563827515, |
|
"logps/generated": -293.83636474609375, |
|
"logps/oppo_gen": -73.348388671875, |
|
"logps/oppo_real": -462.6760559082031, |
|
"logps/real": -521.508056640625, |
|
"loss": -0.2203, |
|
"loss/gen": 0.8699493408203125, |
|
"loss/real": -1.411679983139038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -220.48794555664062, |
|
"rewards/margins": 161.65594482421875, |
|
"rewards/real": -58.832008361816406, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 23.556966359497565, |
|
"learning_rate": 4.879032258064516e-07, |
|
"logits/generated": -1.7639917135238647, |
|
"logits/oppo_generated": -2.72807240486145, |
|
"logits/oppo_real": -3.0851736068725586, |
|
"logits/real": -1.7024996280670166, |
|
"logps/generated": -304.4716796875, |
|
"logps/oppo_gen": -62.214202880859375, |
|
"logps/oppo_real": -267.1644592285156, |
|
"logps/real": -361.82183837890625, |
|
"loss": -0.3428, |
|
"loss/gen": 0.7082281112670898, |
|
"loss/real": -1.0534261465072632, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -242.25747680664062, |
|
"rewards/margins": 147.60009765625, |
|
"rewards/real": -94.65738677978516, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 26.432042922680157, |
|
"learning_rate": 4.874551971326164e-07, |
|
"logits/generated": -1.849442720413208, |
|
"logits/oppo_generated": -2.9244961738586426, |
|
"logits/oppo_real": -2.8555960655212402, |
|
"logits/real": -1.9950306415557861, |
|
"logps/generated": -432.98052978515625, |
|
"logps/oppo_gen": -76.25796508789062, |
|
"logps/oppo_real": -273.7462158203125, |
|
"logps/real": -344.53662109375, |
|
"loss": -0.3943, |
|
"loss/gen": 0.6396173238754272, |
|
"loss/real": -1.2920961380004883, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -356.7225341796875, |
|
"rewards/margins": 285.93218994140625, |
|
"rewards/real": -70.79039001464844, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 25.96543112065059, |
|
"learning_rate": 4.870071684587813e-07, |
|
"logits/generated": -1.903306484222412, |
|
"logits/oppo_generated": -2.7076897621154785, |
|
"logits/oppo_real": -2.8026769161224365, |
|
"logits/real": -1.8154616355895996, |
|
"logps/generated": -306.98822021484375, |
|
"logps/oppo_gen": -92.40176391601562, |
|
"logps/oppo_real": -466.05743408203125, |
|
"logps/real": -509.5103759765625, |
|
"loss": -0.4503, |
|
"loss/gen": 1.017028570175171, |
|
"loss/real": -1.5654706954956055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -214.58645629882812, |
|
"rewards/margins": 171.13351440429688, |
|
"rewards/real": -43.45293426513672, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 24.761144113560263, |
|
"learning_rate": 4.865591397849462e-07, |
|
"logits/generated": -1.9471970796585083, |
|
"logits/oppo_generated": -2.91485595703125, |
|
"logits/oppo_real": -3.02333402633667, |
|
"logits/real": -1.9744383096694946, |
|
"logps/generated": -338.57110595703125, |
|
"logps/oppo_gen": -71.20426940917969, |
|
"logps/oppo_real": -316.012451171875, |
|
"logps/real": -401.06097412109375, |
|
"loss": -0.4181, |
|
"loss/gen": 0.5722190737724304, |
|
"loss/real": -1.1495147943496704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -267.3668212890625, |
|
"rewards/margins": 182.3182830810547, |
|
"rewards/real": -85.04852294921875, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 25.560786779768364, |
|
"learning_rate": 4.861111111111111e-07, |
|
"logits/generated": -1.8378194570541382, |
|
"logits/oppo_generated": -2.835737466812134, |
|
"logits/oppo_real": -2.825862407684326, |
|
"logits/real": -1.9024310111999512, |
|
"logps/generated": -395.5787658691406, |
|
"logps/oppo_gen": -66.29288482666016, |
|
"logps/oppo_real": -411.4686279296875, |
|
"logps/real": -450.5022277832031, |
|
"loss": -0.491, |
|
"loss/gen": 0.5810615420341492, |
|
"loss/real": -1.6096638441085815, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -329.285888671875, |
|
"rewards/margins": 290.2522888183594, |
|
"rewards/real": -39.03361511230469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 27.276715822734577, |
|
"learning_rate": 4.85663082437276e-07, |
|
"logits/generated": -1.2692300081253052, |
|
"logits/oppo_generated": -2.519876480102539, |
|
"logits/oppo_real": -2.3758904933929443, |
|
"logits/real": -1.504533052444458, |
|
"logps/generated": -309.310791015625, |
|
"logps/oppo_gen": -53.77077865600586, |
|
"logps/oppo_real": -232.32125854492188, |
|
"logps/real": -335.275390625, |
|
"loss": -0.4771, |
|
"loss/gen": 0.6719827651977539, |
|
"loss/real": -0.9704589247703552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -255.53997802734375, |
|
"rewards/margins": 152.58587646484375, |
|
"rewards/real": -102.95411682128906, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 31.6782193710368, |
|
"learning_rate": 4.852150537634409e-07, |
|
"logits/generated": -1.967892050743103, |
|
"logits/oppo_generated": -2.847916841506958, |
|
"logits/oppo_real": -3.119495391845703, |
|
"logits/real": -1.8703951835632324, |
|
"logps/generated": -302.1221618652344, |
|
"logps/oppo_gen": -61.632965087890625, |
|
"logps/oppo_real": -283.0968322753906, |
|
"logps/real": -316.48565673828125, |
|
"loss": -0.5487, |
|
"loss/gen": 0.792396605014801, |
|
"loss/real": -1.6661118268966675, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -240.48919677734375, |
|
"rewards/margins": 207.1003875732422, |
|
"rewards/real": -33.388816833496094, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 28.101801413357496, |
|
"learning_rate": 4.847670250896057e-07, |
|
"logits/generated": -1.7287254333496094, |
|
"logits/oppo_generated": -2.817739963531494, |
|
"logits/oppo_real": -2.8102121353149414, |
|
"logits/real": -1.9246106147766113, |
|
"logps/generated": -348.30560302734375, |
|
"logps/oppo_gen": -84.71308135986328, |
|
"logps/oppo_real": -441.73095703125, |
|
"logps/real": -496.3192138671875, |
|
"loss": -0.5813, |
|
"loss/gen": 0.767849326133728, |
|
"loss/real": -1.454117774963379, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -263.5924987792969, |
|
"rewards/margins": 209.0042724609375, |
|
"rewards/real": -54.588233947753906, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 38.45779443916408, |
|
"learning_rate": 4.843189964157705e-07, |
|
"logits/generated": -1.91994047164917, |
|
"logits/oppo_generated": -2.863375186920166, |
|
"logits/oppo_real": -2.9448790550231934, |
|
"logits/real": -1.9825626611709595, |
|
"logps/generated": -329.95269775390625, |
|
"logps/oppo_gen": -68.82878112792969, |
|
"logps/oppo_real": -370.04193115234375, |
|
"logps/real": -432.669189453125, |
|
"loss": -0.5133, |
|
"loss/gen": 0.658030092716217, |
|
"loss/real": -1.373727798461914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -261.1239013671875, |
|
"rewards/margins": 198.49668884277344, |
|
"rewards/real": -62.62722396850586, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 30.626852423248927, |
|
"learning_rate": 4.838709677419355e-07, |
|
"logits/generated": -1.6279140710830688, |
|
"logits/oppo_generated": -2.6728456020355225, |
|
"logits/oppo_real": -2.717794418334961, |
|
"logits/real": -1.7041985988616943, |
|
"logps/generated": -413.343017578125, |
|
"logps/oppo_gen": -96.53443908691406, |
|
"logps/oppo_real": -349.5957336425781, |
|
"logps/real": -410.91473388671875, |
|
"loss": -0.6334, |
|
"loss/gen": 0.40114909410476685, |
|
"loss/real": -1.3868098258972168, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -316.80859375, |
|
"rewards/margins": 255.48956298828125, |
|
"rewards/real": -61.31901550292969, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 28.100519945214646, |
|
"learning_rate": 4.834229390681004e-07, |
|
"logits/generated": -1.8667380809783936, |
|
"logits/oppo_generated": -2.7693190574645996, |
|
"logits/oppo_real": -2.77004337310791, |
|
"logits/real": -1.9453085660934448, |
|
"logps/generated": -386.6142578125, |
|
"logps/oppo_gen": -76.46708679199219, |
|
"logps/oppo_real": -404.62432861328125, |
|
"logps/real": -449.008056640625, |
|
"loss": -0.7145, |
|
"loss/gen": 0.4149353802204132, |
|
"loss/real": -1.5561623573303223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -310.1471862792969, |
|
"rewards/margins": 265.763427734375, |
|
"rewards/real": -44.383766174316406, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 28.100519945214646, |
|
"learning_rate": 4.834229390681004e-07, |
|
"logits/generated": -1.9856168031692505, |
|
"logits/oppo_generated": -2.82077693939209, |
|
"logits/oppo_real": -2.9570560455322266, |
|
"logits/real": -1.8599485158920288, |
|
"logps/generated": -351.98699951171875, |
|
"logps/oppo_gen": -75.9544906616211, |
|
"logps/oppo_real": -272.2792663574219, |
|
"logps/real": -342.5171813964844, |
|
"loss": -0.7074, |
|
"loss/gen": 0.5127630233764648, |
|
"loss/real": -1.2976210117340088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -276.03253173828125, |
|
"rewards/margins": 205.79464721679688, |
|
"rewards/real": -70.23788452148438, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 28.89926119061318, |
|
"learning_rate": 4.829749103942652e-07, |
|
"logits/generated": -1.4996392726898193, |
|
"logits/oppo_generated": -2.77388858795166, |
|
"logits/oppo_real": -2.6705479621887207, |
|
"logits/real": -1.6910290718078613, |
|
"logps/generated": -306.9933166503906, |
|
"logps/oppo_gen": -44.69869613647461, |
|
"logps/oppo_real": -146.0938720703125, |
|
"logps/real": -247.02572631835938, |
|
"loss": -0.7324, |
|
"loss/gen": 0.6118674278259277, |
|
"loss/real": -0.990681529045105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -262.29461669921875, |
|
"rewards/margins": 161.36277770996094, |
|
"rewards/real": -100.93185424804688, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 31.06954538435709, |
|
"learning_rate": 4.825268817204301e-07, |
|
"logits/generated": -1.8742992877960205, |
|
"logits/oppo_generated": -2.823974847793579, |
|
"logits/oppo_real": -2.8963050842285156, |
|
"logits/real": -1.9178167581558228, |
|
"logps/generated": -351.6239929199219, |
|
"logps/oppo_gen": -77.19644165039062, |
|
"logps/oppo_real": -326.88067626953125, |
|
"logps/real": -378.48602294921875, |
|
"loss": -0.698, |
|
"loss/gen": 0.5047196745872498, |
|
"loss/real": -1.4839468002319336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -274.42755126953125, |
|
"rewards/margins": 222.82223510742188, |
|
"rewards/real": -51.60532760620117, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 43.30920802785477, |
|
"learning_rate": 4.820788530465949e-07, |
|
"logits/generated": -1.7955554723739624, |
|
"logits/oppo_generated": -2.8842811584472656, |
|
"logits/oppo_real": -2.962029457092285, |
|
"logits/real": -1.9502441883087158, |
|
"logps/generated": -325.9687194824219, |
|
"logps/oppo_gen": -54.408782958984375, |
|
"logps/oppo_real": -296.562255859375, |
|
"logps/real": -371.3411865234375, |
|
"loss": -0.6694, |
|
"loss/gen": 0.6074373126029968, |
|
"loss/real": -1.2522108554840088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -271.5599365234375, |
|
"rewards/margins": 196.781005859375, |
|
"rewards/real": -74.77892303466797, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 67.32772068514016, |
|
"learning_rate": 4.816308243727598e-07, |
|
"logits/generated": -1.738360047340393, |
|
"logits/oppo_generated": -2.689702272415161, |
|
"logits/oppo_real": -2.605893611907959, |
|
"logits/real": -1.9019668102264404, |
|
"logps/generated": -328.74029541015625, |
|
"logps/oppo_gen": -72.64117431640625, |
|
"logps/oppo_real": -543.97119140625, |
|
"logps/real": -576.9856567382812, |
|
"loss": -0.7155, |
|
"loss/gen": 0.8135882616043091, |
|
"loss/real": -1.6698557138442993, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -256.09912109375, |
|
"rewards/margins": 223.08465576171875, |
|
"rewards/real": -33.014434814453125, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 45.615423331403214, |
|
"learning_rate": 4.811827956989247e-07, |
|
"logits/generated": -1.8823816776275635, |
|
"logits/oppo_generated": -2.6806015968322754, |
|
"logits/oppo_real": -2.851822853088379, |
|
"logits/real": -1.7596267461776733, |
|
"logps/generated": -390.17926025390625, |
|
"logps/oppo_gen": -60.20751953125, |
|
"logps/oppo_real": -257.2502136230469, |
|
"logps/real": -304.0963439941406, |
|
"loss": -0.8542, |
|
"loss/gen": 0.6062071919441223, |
|
"loss/real": -1.531538486480713, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -329.97174072265625, |
|
"rewards/margins": 283.1255798339844, |
|
"rewards/real": -46.84613800048828, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 82.74539439128168, |
|
"learning_rate": 4.807347670250896e-07, |
|
"logits/generated": -2.0427887439727783, |
|
"logits/oppo_generated": -2.855865001678467, |
|
"logits/oppo_real": -3.10068416595459, |
|
"logits/real": -1.9768484830856323, |
|
"logps/generated": -352.57281494140625, |
|
"logps/oppo_gen": -83.05951690673828, |
|
"logps/oppo_real": -407.9609375, |
|
"logps/real": -482.402587890625, |
|
"loss": -0.7341, |
|
"loss/gen": 0.5735456347465515, |
|
"loss/real": -1.2555840015411377, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -269.5133056640625, |
|
"rewards/margins": 195.07171630859375, |
|
"rewards/real": -74.44159698486328, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 59.27270587807996, |
|
"learning_rate": 4.802867383512544e-07, |
|
"logits/generated": -2.0733423233032227, |
|
"logits/oppo_generated": -2.820967197418213, |
|
"logits/oppo_real": -2.9550280570983887, |
|
"logits/real": -1.9983410835266113, |
|
"logps/generated": -397.8853454589844, |
|
"logps/oppo_gen": -113.86212921142578, |
|
"logps/oppo_real": -391.526123046875, |
|
"logps/real": -436.72381591796875, |
|
"loss": -0.9052, |
|
"loss/gen": 0.5662827491760254, |
|
"loss/real": -1.548022985458374, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -284.0232238769531, |
|
"rewards/margins": 238.82553100585938, |
|
"rewards/real": -45.19770050048828, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 48.762087844323275, |
|
"learning_rate": 4.798387096774193e-07, |
|
"logits/generated": -1.8719103336334229, |
|
"logits/oppo_generated": -2.8029284477233887, |
|
"logits/oppo_real": -2.9302010536193848, |
|
"logits/real": -1.9117934703826904, |
|
"logps/generated": -291.1409912109375, |
|
"logps/oppo_gen": -59.32632827758789, |
|
"logps/oppo_real": -256.26556396484375, |
|
"logps/real": -281.3046875, |
|
"loss": -0.7887, |
|
"loss/gen": 0.9425208568572998, |
|
"loss/real": -1.7496089935302734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.814697265625, |
|
"rewards/margins": 206.77557373046875, |
|
"rewards/real": -25.03911590576172, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 54.32774527117797, |
|
"learning_rate": 4.793906810035842e-07, |
|
"logits/generated": -1.8661472797393799, |
|
"logits/oppo_generated": -2.915862560272217, |
|
"logits/oppo_real": -2.777987480163574, |
|
"logits/real": -2.0660204887390137, |
|
"logps/generated": -330.3395080566406, |
|
"logps/oppo_gen": -69.80546569824219, |
|
"logps/oppo_real": -230.58383178710938, |
|
"logps/real": -266.6177673339844, |
|
"loss": -0.93, |
|
"loss/gen": 0.6018867492675781, |
|
"loss/real": -1.6396608352661133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.5340576171875, |
|
"rewards/margins": 224.50010681152344, |
|
"rewards/real": -36.03392791748047, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 56.34434795855748, |
|
"learning_rate": 4.789426523297491e-07, |
|
"logits/generated": -1.8885971307754517, |
|
"logits/oppo_generated": -3.002845048904419, |
|
"logits/oppo_real": -2.9699549674987793, |
|
"logits/real": -2.183290719985962, |
|
"logps/generated": -343.339599609375, |
|
"logps/oppo_gen": -82.54539489746094, |
|
"logps/oppo_real": -261.07891845703125, |
|
"logps/real": -305.88140869140625, |
|
"loss": -0.9169, |
|
"loss/gen": 0.6482059955596924, |
|
"loss/real": -1.5519750118255615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.794189453125, |
|
"rewards/margins": 215.99168395996094, |
|
"rewards/real": -44.80250930786133, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 51.83929494119302, |
|
"learning_rate": 4.78494623655914e-07, |
|
"logits/generated": -1.9645639657974243, |
|
"logits/oppo_generated": -2.8191170692443848, |
|
"logits/oppo_real": -2.8346924781799316, |
|
"logits/real": -2.02170991897583, |
|
"logps/generated": -359.3294982910156, |
|
"logps/oppo_gen": -77.02418518066406, |
|
"logps/oppo_real": -365.96343994140625, |
|
"logps/real": -455.93804931640625, |
|
"loss": -0.7995, |
|
"loss/gen": 0.5177488327026367, |
|
"loss/real": -1.1002535820007324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -282.3053283691406, |
|
"rewards/margins": 192.3306884765625, |
|
"rewards/real": -89.97463989257812, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 68.54197932426156, |
|
"learning_rate": 4.780465949820789e-07, |
|
"logits/generated": -2.152953624725342, |
|
"logits/oppo_generated": -2.9769649505615234, |
|
"logits/oppo_real": -3.0434319972991943, |
|
"logits/real": -2.1738698482513428, |
|
"logps/generated": -345.0735778808594, |
|
"logps/oppo_gen": -78.12904357910156, |
|
"logps/oppo_real": -379.5708312988281, |
|
"logps/real": -461.0662536621094, |
|
"loss": -0.92, |
|
"loss/gen": 0.7530688047409058, |
|
"loss/real": -1.1850459575653076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -266.94451904296875, |
|
"rewards/margins": 185.4491424560547, |
|
"rewards/real": -81.49540710449219, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 50.14855129777554, |
|
"learning_rate": 4.775985663082437e-07, |
|
"logits/generated": -1.7497427463531494, |
|
"logits/oppo_generated": -2.8537919521331787, |
|
"logits/oppo_real": -2.717353105545044, |
|
"logits/real": -2.008902072906494, |
|
"logps/generated": -428.0735778808594, |
|
"logps/oppo_gen": -119.7020492553711, |
|
"logps/oppo_real": -358.0323791503906, |
|
"logps/real": -440.1455383300781, |
|
"loss": -1.0468, |
|
"loss/gen": 0.5207056999206543, |
|
"loss/real": -1.178868293762207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -308.3714904785156, |
|
"rewards/margins": 226.25833129882812, |
|
"rewards/real": -82.11316680908203, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 83.74393343233139, |
|
"learning_rate": 4.771505376344086e-07, |
|
"logits/generated": -1.6041526794433594, |
|
"logits/oppo_generated": -2.863269805908203, |
|
"logits/oppo_real": -2.6908156871795654, |
|
"logits/real": -1.9434775114059448, |
|
"logps/generated": -464.01544189453125, |
|
"logps/oppo_gen": -73.95469665527344, |
|
"logps/oppo_real": -308.79437255859375, |
|
"logps/real": -393.7951354980469, |
|
"loss": -0.8861, |
|
"loss/gen": 0.208269402384758, |
|
"loss/real": -1.1499923467636108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -390.06072998046875, |
|
"rewards/margins": 305.0599365234375, |
|
"rewards/real": -85.00077819824219, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 51.583056981707905, |
|
"learning_rate": 4.7670250896057344e-07, |
|
"logits/generated": -1.8397870063781738, |
|
"logits/oppo_generated": -2.8908724784851074, |
|
"logits/oppo_real": -2.8895483016967773, |
|
"logits/real": -1.9887313842773438, |
|
"logps/generated": -355.3204650878906, |
|
"logps/oppo_gen": -76.94686126708984, |
|
"logps/oppo_real": -271.02813720703125, |
|
"logps/real": -311.24884033203125, |
|
"loss": -1.0606, |
|
"loss/gen": 0.5185320377349854, |
|
"loss/real": -1.5977928638458252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -278.37359619140625, |
|
"rewards/margins": 238.15289306640625, |
|
"rewards/real": -40.22071075439453, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 84.3485346225336, |
|
"learning_rate": 4.762544802867383e-07, |
|
"logits/generated": -2.00917387008667, |
|
"logits/oppo_generated": -2.873152732849121, |
|
"logits/oppo_real": -2.9451375007629395, |
|
"logits/real": -2.055788993835449, |
|
"logps/generated": -423.2607421875, |
|
"logps/oppo_gen": -63.39752960205078, |
|
"logps/oppo_real": -189.16378784179688, |
|
"logps/real": -238.4603271484375, |
|
"loss": -0.9892, |
|
"loss/gen": 0.42659705877304077, |
|
"loss/real": -1.5070346593856812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -359.86322021484375, |
|
"rewards/margins": 310.566650390625, |
|
"rewards/real": -49.296546936035156, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 56.707858802814975, |
|
"learning_rate": 4.758064516129032e-07, |
|
"logits/generated": -1.7914602756500244, |
|
"logits/oppo_generated": -2.698634147644043, |
|
"logits/oppo_real": -2.8618617057800293, |
|
"logits/real": -1.7156788110733032, |
|
"logps/generated": -337.7628479003906, |
|
"logps/oppo_gen": -66.55247497558594, |
|
"logps/oppo_real": -330.7273254394531, |
|
"logps/real": -382.69952392578125, |
|
"loss": -1.1736, |
|
"loss/gen": 0.5937217473983765, |
|
"loss/real": -1.4802782535552979, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -271.21038818359375, |
|
"rewards/margins": 219.2382049560547, |
|
"rewards/real": -51.9721794128418, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 63.84164674704906, |
|
"learning_rate": 4.753584229390681e-07, |
|
"logits/generated": -1.484118103981018, |
|
"logits/oppo_generated": -2.6399593353271484, |
|
"logits/oppo_real": -2.7412514686584473, |
|
"logits/real": -1.6595765352249146, |
|
"logps/generated": -318.4638366699219, |
|
"logps/oppo_gen": -71.26600646972656, |
|
"logps/oppo_real": -342.77703857421875, |
|
"logps/real": -332.076416015625, |
|
"loss": -1.0251, |
|
"loss/gen": 1.2006943225860596, |
|
"loss/real": -2.107006311416626, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -247.19781494140625, |
|
"rewards/margins": 257.8984680175781, |
|
"rewards/real": 10.700631141662598, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 75.05080961213876, |
|
"learning_rate": 4.749103942652329e-07, |
|
"logits/generated": -1.6428945064544678, |
|
"logits/oppo_generated": -2.657951831817627, |
|
"logits/oppo_real": -2.851677656173706, |
|
"logits/real": -1.653546929359436, |
|
"logps/generated": -400.077880859375, |
|
"logps/oppo_gen": -85.37565612792969, |
|
"logps/oppo_real": -318.65338134765625, |
|
"logps/real": -342.5865173339844, |
|
"loss": -1.161, |
|
"loss/gen": 0.37989306449890137, |
|
"loss/real": -1.7606685161590576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -314.70220947265625, |
|
"rewards/margins": 290.7690734863281, |
|
"rewards/real": -23.933155059814453, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 46.01563826400488, |
|
"learning_rate": 4.7446236559139785e-07, |
|
"logits/generated": -1.8539488315582275, |
|
"logits/oppo_generated": -2.7544326782226562, |
|
"logits/oppo_real": -2.9937453269958496, |
|
"logits/real": -1.6929676532745361, |
|
"logps/generated": -571.8555908203125, |
|
"logps/oppo_gen": -91.8690185546875, |
|
"logps/oppo_real": -138.0150604248047, |
|
"logps/real": -236.94998168945312, |
|
"loss": -1.303, |
|
"loss/gen": 0.19529122114181519, |
|
"loss/real": -1.010650634765625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -479.98651123046875, |
|
"rewards/margins": 381.0516357421875, |
|
"rewards/real": -98.93492889404297, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 108.21823112061571, |
|
"learning_rate": 4.740143369175627e-07, |
|
"logits/generated": -1.5786761045455933, |
|
"logits/oppo_generated": -2.937568187713623, |
|
"logits/oppo_real": -2.910910129547119, |
|
"logits/real": -2.0116238594055176, |
|
"logps/generated": -319.71612548828125, |
|
"logps/oppo_gen": -72.81363677978516, |
|
"logps/oppo_real": -349.1295166015625, |
|
"logps/real": -343.4236145019531, |
|
"loss": -1.2469, |
|
"loss/gen": 1.134260654449463, |
|
"loss/real": -2.0570592880249023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -246.9025115966797, |
|
"rewards/margins": 252.60841369628906, |
|
"rewards/real": 5.705905914306641, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 61.41563819772921, |
|
"learning_rate": 4.7356630824372756e-07, |
|
"logits/generated": -1.6027933359146118, |
|
"logits/oppo_generated": -2.8456101417541504, |
|
"logits/oppo_real": -2.7031779289245605, |
|
"logits/real": -1.860973596572876, |
|
"logps/generated": -398.1639404296875, |
|
"logps/oppo_gen": -74.38111114501953, |
|
"logps/oppo_real": -210.15036010742188, |
|
"logps/real": -244.78244018554688, |
|
"loss": -1.2393, |
|
"loss/gen": 0.38152068853378296, |
|
"loss/real": -1.653679370880127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -323.7828369140625, |
|
"rewards/margins": 289.1507568359375, |
|
"rewards/real": -34.63207244873047, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 155.75063954523588, |
|
"learning_rate": 4.731182795698925e-07, |
|
"logits/generated": -1.7713713645935059, |
|
"logits/oppo_generated": -2.8624844551086426, |
|
"logits/oppo_real": -3.1369876861572266, |
|
"logits/real": -1.7838329076766968, |
|
"logps/generated": -434.6895446777344, |
|
"logps/oppo_gen": -89.36515808105469, |
|
"logps/oppo_real": -372.01629638671875, |
|
"logps/real": -403.449462890625, |
|
"loss": -1.0078, |
|
"loss/gen": 0.25918668508529663, |
|
"loss/real": -1.6856684684753418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -345.32440185546875, |
|
"rewards/margins": 313.8912353515625, |
|
"rewards/real": -31.433147430419922, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 110.75212945892959, |
|
"learning_rate": 4.726702508960573e-07, |
|
"logits/generated": -1.7238816022872925, |
|
"logits/oppo_generated": -2.851134777069092, |
|
"logits/oppo_real": -2.806102752685547, |
|
"logits/real": -1.8398982286453247, |
|
"logps/generated": -455.3783874511719, |
|
"logps/oppo_gen": -102.20521545410156, |
|
"logps/oppo_real": -303.71771240234375, |
|
"logps/real": -384.83721923828125, |
|
"loss": -1.2778, |
|
"loss/gen": 0.28883445262908936, |
|
"loss/real": -1.1888045072555542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -353.17315673828125, |
|
"rewards/margins": 272.05364990234375, |
|
"rewards/real": -81.11953735351562, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 208.35696532633605, |
|
"learning_rate": 4.722222222222222e-07, |
|
"logits/generated": -1.8423570394515991, |
|
"logits/oppo_generated": -2.7869691848754883, |
|
"logits/oppo_real": -2.934145212173462, |
|
"logits/real": -1.8845537900924683, |
|
"logps/generated": -446.48486328125, |
|
"logps/oppo_gen": -118.00005340576172, |
|
"logps/oppo_real": -354.1058654785156, |
|
"logps/real": -363.6239013671875, |
|
"loss": -0.9829, |
|
"loss/gen": 0.3972959518432617, |
|
"loss/real": -1.904819130897522, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -328.4848327636719, |
|
"rewards/margins": 318.96673583984375, |
|
"rewards/real": -9.518078804016113, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 172.94304517672862, |
|
"learning_rate": 4.717741935483871e-07, |
|
"logits/generated": -1.7597293853759766, |
|
"logits/oppo_generated": -2.940918445587158, |
|
"logits/oppo_real": -2.8966355323791504, |
|
"logits/real": -2.1739630699157715, |
|
"logps/generated": -335.61669921875, |
|
"logps/oppo_gen": -59.739017486572266, |
|
"logps/oppo_real": -344.7768249511719, |
|
"logps/real": -356.95220947265625, |
|
"loss": -1.2174, |
|
"loss/gen": 0.9002517461776733, |
|
"loss/real": -1.8782463073730469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -275.877685546875, |
|
"rewards/margins": 263.70233154296875, |
|
"rewards/real": -12.175359725952148, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 132.76997113193556, |
|
"learning_rate": 4.7132616487455197e-07, |
|
"logits/generated": -1.8769468069076538, |
|
"logits/oppo_generated": -2.7915775775909424, |
|
"logits/oppo_real": -3.1147103309631348, |
|
"logits/real": -1.7647349834442139, |
|
"logps/generated": -427.8299255371094, |
|
"logps/oppo_gen": -102.62004089355469, |
|
"logps/oppo_real": -260.40576171875, |
|
"logps/real": -333.41436767578125, |
|
"loss": -1.182, |
|
"loss/gen": 0.3168797492980957, |
|
"loss/real": -1.269913911819458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -325.20989990234375, |
|
"rewards/margins": 252.20127868652344, |
|
"rewards/real": -73.00860595703125, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 63.105612740666736, |
|
"learning_rate": 4.708781362007168e-07, |
|
"logits/generated": -1.6452438831329346, |
|
"logits/oppo_generated": -2.487020492553711, |
|
"logits/oppo_real": -2.6723856925964355, |
|
"logits/real": -1.5103099346160889, |
|
"logps/generated": -434.71832275390625, |
|
"logps/oppo_gen": -165.93902587890625, |
|
"logps/oppo_real": -273.2274169921875, |
|
"logps/real": -338.7171630859375, |
|
"loss": -1.2914, |
|
"loss/gen": 1.2655643224716187, |
|
"loss/real": -1.3451025485992432, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -268.7793273925781, |
|
"rewards/margins": 203.28956604003906, |
|
"rewards/real": -65.48976135253906, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 108.93035063315688, |
|
"learning_rate": 4.7043010752688173e-07, |
|
"logits/generated": -1.6793558597564697, |
|
"logits/oppo_generated": -2.73710560798645, |
|
"logits/oppo_real": -2.767047166824341, |
|
"logits/real": -1.719926357269287, |
|
"logps/generated": -647.4390258789062, |
|
"logps/oppo_gen": -92.0302963256836, |
|
"logps/oppo_real": -215.4584503173828, |
|
"logps/real": -281.78326416015625, |
|
"loss": -1.2024, |
|
"loss/gen": 0.2647426724433899, |
|
"loss/real": -1.336751937866211, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -555.40869140625, |
|
"rewards/margins": 489.08392333984375, |
|
"rewards/real": -66.32481384277344, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 59.14760526838238, |
|
"learning_rate": 4.6998207885304656e-07, |
|
"logits/generated": -1.0961192846298218, |
|
"logits/oppo_generated": -2.6469738483428955, |
|
"logits/oppo_real": -2.294445037841797, |
|
"logits/real": -1.6948471069335938, |
|
"logps/generated": -425.1643981933594, |
|
"logps/oppo_gen": -108.79867553710938, |
|
"logps/oppo_real": -322.5262756347656, |
|
"logps/real": -321.787841796875, |
|
"loss": -1.324, |
|
"loss/gen": 0.8741945028305054, |
|
"loss/real": -2.0073840618133545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -316.36572265625, |
|
"rewards/margins": 317.1040954589844, |
|
"rewards/real": 0.7384042739868164, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 168.3731266135573, |
|
"learning_rate": 4.6953405017921144e-07, |
|
"logits/generated": -1.9429898262023926, |
|
"logits/oppo_generated": -2.5572714805603027, |
|
"logits/oppo_real": -2.9609758853912354, |
|
"logits/real": -1.4800224304199219, |
|
"logps/generated": -361.43359375, |
|
"logps/oppo_gen": -79.15040588378906, |
|
"logps/oppo_real": -370.16033935546875, |
|
"logps/real": -355.4413757324219, |
|
"loss": -1.2951, |
|
"loss/gen": 0.8757161498069763, |
|
"loss/real": -2.147189140319824, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -282.2831726074219, |
|
"rewards/margins": 297.0021057128906, |
|
"rewards/real": 14.718932151794434, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 85.13344591452439, |
|
"learning_rate": 4.690860215053763e-07, |
|
"logits/generated": -1.8185949325561523, |
|
"logits/oppo_generated": -2.818962335586548, |
|
"logits/oppo_real": -2.974072217941284, |
|
"logits/real": -1.8212263584136963, |
|
"logps/generated": -416.0355224609375, |
|
"logps/oppo_gen": -87.5977783203125, |
|
"logps/oppo_real": -245.32896423339844, |
|
"logps/real": -231.43038940429688, |
|
"loss": -1.3389, |
|
"loss/gen": 0.3807252049446106, |
|
"loss/real": -2.1389856338500977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -328.437744140625, |
|
"rewards/margins": 342.3363037109375, |
|
"rewards/real": 13.898568153381348, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 83.88629301769151, |
|
"learning_rate": 4.686379928315412e-07, |
|
"logits/generated": -1.616201400756836, |
|
"logits/oppo_generated": -2.633566379547119, |
|
"logits/oppo_real": -2.6259918212890625, |
|
"logits/real": -1.8099052906036377, |
|
"logps/generated": -367.63824462890625, |
|
"logps/oppo_gen": -84.75750732421875, |
|
"logps/oppo_real": -315.4161376953125, |
|
"logps/real": -310.99310302734375, |
|
"loss": -1.3142, |
|
"loss/gen": 1.0110325813293457, |
|
"loss/real": -2.0442302227020264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -282.8807373046875, |
|
"rewards/margins": 287.30377197265625, |
|
"rewards/real": 4.4230194091796875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 105.49686199420815, |
|
"learning_rate": 4.681899641577061e-07, |
|
"logits/generated": -1.8293168544769287, |
|
"logits/oppo_generated": -2.7601919174194336, |
|
"logits/oppo_real": -2.861198902130127, |
|
"logits/real": -1.8698251247406006, |
|
"logps/generated": -464.19207763671875, |
|
"logps/oppo_gen": -63.106407165527344, |
|
"logps/oppo_real": -254.43199157714844, |
|
"logps/real": -266.5618591308594, |
|
"loss": -1.2892, |
|
"loss/gen": 0.18153703212738037, |
|
"loss/real": -1.8787013292312622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -401.08563232421875, |
|
"rewards/margins": 388.95574951171875, |
|
"rewards/real": -12.129861831665039, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 122.08849089646002, |
|
"learning_rate": 4.677419354838709e-07, |
|
"logits/generated": -1.6834774017333984, |
|
"logits/oppo_generated": -2.9130988121032715, |
|
"logits/oppo_real": -2.7563557624816895, |
|
"logits/real": -2.015559673309326, |
|
"logps/generated": -434.40545654296875, |
|
"logps/oppo_gen": -71.0981216430664, |
|
"logps/oppo_real": -282.122314453125, |
|
"logps/real": -366.6064453125, |
|
"loss": -1.2773, |
|
"loss/gen": 0.20630814135074615, |
|
"loss/real": -1.1551584005355835, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -363.30731201171875, |
|
"rewards/margins": 278.8231506347656, |
|
"rewards/real": -84.48416137695312, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 68.69515212721528, |
|
"learning_rate": 4.6729390681003585e-07, |
|
"logits/generated": -1.648697853088379, |
|
"logits/oppo_generated": -2.9668259620666504, |
|
"logits/oppo_real": -2.745316505432129, |
|
"logits/real": -2.1867191791534424, |
|
"logps/generated": -530.75732421875, |
|
"logps/oppo_gen": -77.98722076416016, |
|
"logps/oppo_real": -298.9158020019531, |
|
"logps/real": -301.756103515625, |
|
"loss": -1.4792, |
|
"loss/gen": 0.20026695728302002, |
|
"loss/real": -1.9715969562530518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -452.77008056640625, |
|
"rewards/margins": 449.92974853515625, |
|
"rewards/real": -2.8403053283691406, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 57.901015430565856, |
|
"learning_rate": 4.668458781362007e-07, |
|
"logits/generated": -1.9489855766296387, |
|
"logits/oppo_generated": -2.750535726547241, |
|
"logits/oppo_real": -3.053309440612793, |
|
"logits/real": -1.809377670288086, |
|
"logps/generated": -428.41241455078125, |
|
"logps/oppo_gen": -60.58064270019531, |
|
"logps/oppo_real": -289.3247985839844, |
|
"logps/real": -279.5442199707031, |
|
"loss": -1.3882, |
|
"loss/gen": 0.2120557278394699, |
|
"loss/real": -2.097805976867676, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -367.831787109375, |
|
"rewards/margins": 377.6123962402344, |
|
"rewards/real": 9.780599594116211, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 74.52388262853225, |
|
"learning_rate": 4.6639784946236556e-07, |
|
"logits/generated": -2.027777910232544, |
|
"logits/oppo_generated": -2.857908248901367, |
|
"logits/oppo_real": -2.9202375411987305, |
|
"logits/real": -2.0229392051696777, |
|
"logps/generated": -437.67413330078125, |
|
"logps/oppo_gen": -151.37307739257812, |
|
"logps/oppo_real": -296.711181640625, |
|
"logps/real": -294.72747802734375, |
|
"loss": -1.2855, |
|
"loss/gen": 1.1820727586746216, |
|
"loss/real": -2.019836902618408, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -286.301025390625, |
|
"rewards/margins": 288.28472900390625, |
|
"rewards/real": 1.983699083328247, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 117.49908576771992, |
|
"learning_rate": 4.6594982078853044e-07, |
|
"logits/generated": -1.4556838274002075, |
|
"logits/oppo_generated": -2.790827751159668, |
|
"logits/oppo_real": -2.5702054500579834, |
|
"logits/real": -1.9723587036132812, |
|
"logps/generated": -398.6170959472656, |
|
"logps/oppo_gen": -72.09220123291016, |
|
"logps/oppo_real": -381.5404357910156, |
|
"logps/real": -404.51959228515625, |
|
"loss": -1.3209, |
|
"loss/gen": 0.30479684472084045, |
|
"loss/real": -1.7702081203460693, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -326.5248718261719, |
|
"rewards/margins": 303.54571533203125, |
|
"rewards/real": -22.979171752929688, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 65.84271178583474, |
|
"learning_rate": 4.655017921146953e-07, |
|
"logits/generated": -1.4536468982696533, |
|
"logits/oppo_generated": -2.7350287437438965, |
|
"logits/oppo_real": -2.7642884254455566, |
|
"logits/real": -1.7534418106079102, |
|
"logps/generated": -564.8858642578125, |
|
"logps/oppo_gen": -75.31367492675781, |
|
"logps/oppo_real": -265.2264709472656, |
|
"logps/real": -327.7047424316406, |
|
"loss": -1.3234, |
|
"loss/gen": 0.1534099280834198, |
|
"loss/real": -1.3752171993255615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -489.57220458984375, |
|
"rewards/margins": 427.0939025878906, |
|
"rewards/real": -62.4782829284668, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 61.410231661624394, |
|
"learning_rate": 4.6505376344086015e-07, |
|
"logits/generated": -1.8642635345458984, |
|
"logits/oppo_generated": -2.9040493965148926, |
|
"logits/oppo_real": -3.1028363704681396, |
|
"logits/real": -1.919235110282898, |
|
"logps/generated": -544.6783447265625, |
|
"logps/oppo_gen": -125.20480346679688, |
|
"logps/oppo_real": -288.18572998046875, |
|
"logps/real": -292.4162902832031, |
|
"loss": -1.5366, |
|
"loss/gen": 0.14077386260032654, |
|
"loss/real": -1.9576942920684814, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -419.47357177734375, |
|
"rewards/margins": 415.24298095703125, |
|
"rewards/real": -4.230566024780273, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 81.7004759682433, |
|
"learning_rate": 4.646057347670251e-07, |
|
"logits/generated": -1.400865077972412, |
|
"logits/oppo_generated": -2.81662917137146, |
|
"logits/oppo_real": -2.771235227584839, |
|
"logits/real": -1.9147589206695557, |
|
"logps/generated": -483.5452880859375, |
|
"logps/oppo_gen": -75.8317642211914, |
|
"logps/oppo_real": -309.955078125, |
|
"logps/real": -345.3260498046875, |
|
"loss": -1.477, |
|
"loss/gen": 0.14227358996868134, |
|
"loss/real": -1.6462900638580322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -407.7135009765625, |
|
"rewards/margins": 372.342529296875, |
|
"rewards/real": -35.37098693847656, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 76.67643945298647, |
|
"learning_rate": 4.641577060931899e-07, |
|
"logits/generated": -1.515089511871338, |
|
"logits/oppo_generated": -2.6174123287200928, |
|
"logits/oppo_real": -2.623584747314453, |
|
"logits/real": -1.833693265914917, |
|
"logps/generated": -522.1652221679688, |
|
"logps/oppo_gen": -89.05635070800781, |
|
"logps/oppo_real": -326.333251953125, |
|
"logps/real": -330.11474609375, |
|
"loss": -1.4459, |
|
"loss/gen": 0.16859720647335052, |
|
"loss/real": -1.9621846675872803, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -433.10888671875, |
|
"rewards/margins": 429.32733154296875, |
|
"rewards/real": -3.7815260887145996, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 82.81281249941038, |
|
"learning_rate": 4.637096774193548e-07, |
|
"logits/generated": -1.8157904148101807, |
|
"logits/oppo_generated": -3.0457491874694824, |
|
"logits/oppo_real": -2.8690385818481445, |
|
"logits/real": -2.337892532348633, |
|
"logps/generated": -435.04327392578125, |
|
"logps/oppo_gen": -73.73023986816406, |
|
"logps/oppo_real": -327.18359375, |
|
"logps/real": -325.4014892578125, |
|
"loss": -1.5047, |
|
"loss/gen": 0.7044680118560791, |
|
"loss/real": -2.0178208351135254, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -361.31298828125, |
|
"rewards/margins": 363.0950927734375, |
|
"rewards/real": 1.7820682525634766, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 125.18439524357659, |
|
"learning_rate": 4.6326164874551973e-07, |
|
"logits/generated": -1.8496112823486328, |
|
"logits/oppo_generated": -2.835294246673584, |
|
"logits/oppo_real": -2.8998498916625977, |
|
"logits/real": -2.04846453666687, |
|
"logps/generated": -464.38885498046875, |
|
"logps/oppo_gen": -70.26353454589844, |
|
"logps/oppo_real": -340.30975341796875, |
|
"logps/real": -354.45721435546875, |
|
"loss": -1.4469, |
|
"loss/gen": 0.40815508365631104, |
|
"loss/real": -1.858525276184082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -394.12530517578125, |
|
"rewards/margins": 379.97784423828125, |
|
"rewards/real": -14.14747428894043, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 49.26206179629386, |
|
"learning_rate": 4.6281362007168456e-07, |
|
"logits/generated": -1.7525596618652344, |
|
"logits/oppo_generated": -2.7462942600250244, |
|
"logits/oppo_real": -2.756624221801758, |
|
"logits/real": -1.8544926643371582, |
|
"logps/generated": -438.41241455078125, |
|
"logps/oppo_gen": -43.12284851074219, |
|
"logps/oppo_real": -88.672607421875, |
|
"logps/real": -175.98345947265625, |
|
"loss": -1.4557, |
|
"loss/gen": 0.6103305816650391, |
|
"loss/real": -1.1268913745880127, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -395.28961181640625, |
|
"rewards/margins": 307.978759765625, |
|
"rewards/real": -87.31085968017578, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 61.709413850825875, |
|
"learning_rate": 4.6236559139784944e-07, |
|
"logits/generated": -1.9703348875045776, |
|
"logits/oppo_generated": -3.2303848266601562, |
|
"logits/oppo_real": -3.089721918106079, |
|
"logits/real": -2.517977714538574, |
|
"logps/generated": -519.572021484375, |
|
"logps/oppo_gen": -85.11558532714844, |
|
"logps/oppo_real": -363.27288818359375, |
|
"logps/real": -372.19915771484375, |
|
"loss": -1.6645, |
|
"loss/gen": 0.11303215473890305, |
|
"loss/real": -1.9107370376586914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -434.4564208984375, |
|
"rewards/margins": 425.5301513671875, |
|
"rewards/real": -8.92629623413086, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 88.81122448063645, |
|
"learning_rate": 4.619175627240143e-07, |
|
"logits/generated": -1.938302993774414, |
|
"logits/oppo_generated": -2.8577804565429688, |
|
"logits/oppo_real": -2.9372658729553223, |
|
"logits/real": -2.0496668815612793, |
|
"logps/generated": -748.061767578125, |
|
"logps/oppo_gen": -77.513916015625, |
|
"logps/oppo_real": -263.41583251953125, |
|
"logps/real": -329.33160400390625, |
|
"loss": -1.5934, |
|
"loss/gen": 0.13084131479263306, |
|
"loss/real": -1.3408421277999878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -670.5478515625, |
|
"rewards/margins": 604.6320190429688, |
|
"rewards/real": -65.91577911376953, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 83.68007225252317, |
|
"learning_rate": 4.614695340501792e-07, |
|
"logits/generated": -1.7787394523620605, |
|
"logits/oppo_generated": -2.7984981536865234, |
|
"logits/oppo_real": -2.694584369659424, |
|
"logits/real": -2.0291075706481934, |
|
"logps/generated": -396.68927001953125, |
|
"logps/oppo_gen": -49.8719596862793, |
|
"logps/oppo_real": -201.35671997070312, |
|
"logps/real": -242.06234741210938, |
|
"loss": -1.5737, |
|
"loss/gen": 0.6225491762161255, |
|
"loss/real": -1.5929436683654785, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -346.81732177734375, |
|
"rewards/margins": 306.1116943359375, |
|
"rewards/real": -40.70562744140625, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 66.71267170790156, |
|
"learning_rate": 4.6102150537634403e-07, |
|
"logits/generated": -2.229249954223633, |
|
"logits/oppo_generated": -2.8614678382873535, |
|
"logits/oppo_real": -3.104336738586426, |
|
"logits/real": -2.1488466262817383, |
|
"logps/generated": -631.5731201171875, |
|
"logps/oppo_gen": -65.24995422363281, |
|
"logps/oppo_real": -279.1671142578125, |
|
"logps/real": -288.8418273925781, |
|
"loss": -1.5765, |
|
"loss/gen": 0.17149776220321655, |
|
"loss/real": -1.9032527208328247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -566.3231201171875, |
|
"rewards/margins": 556.6484375, |
|
"rewards/real": -9.67473030090332, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 169.06562497111884, |
|
"learning_rate": 4.6057347670250897e-07, |
|
"logits/generated": -1.8923313617706299, |
|
"logits/oppo_generated": -2.743807554244995, |
|
"logits/oppo_real": -2.8210201263427734, |
|
"logits/real": -2.0257139205932617, |
|
"logps/generated": -499.35198974609375, |
|
"logps/oppo_gen": -89.97515869140625, |
|
"logps/oppo_real": -332.08160400390625, |
|
"logps/real": -333.4979248046875, |
|
"loss": -1.5969, |
|
"loss/gen": 0.5354217886924744, |
|
"loss/real": -1.9858367443084717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -409.3768615722656, |
|
"rewards/margins": 407.96051025390625, |
|
"rewards/real": -1.4163341522216797, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 46.82037963556842, |
|
"learning_rate": 4.601254480286738e-07, |
|
"logits/generated": -1.4846677780151367, |
|
"logits/oppo_generated": -2.88552188873291, |
|
"logits/oppo_real": -2.462414264678955, |
|
"logits/real": -2.096205472946167, |
|
"logps/generated": -536.2479248046875, |
|
"logps/oppo_gen": -83.80229949951172, |
|
"logps/oppo_real": -202.01084899902344, |
|
"logps/real": -235.54244995117188, |
|
"loss": -1.6622, |
|
"loss/gen": 0.28287458419799805, |
|
"loss/real": -1.6646840572357178, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -452.4456481933594, |
|
"rewards/margins": 418.9140625, |
|
"rewards/real": -33.531593322753906, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 62.38898539042669, |
|
"learning_rate": 4.596774193548387e-07, |
|
"logits/generated": -1.2318034172058105, |
|
"logits/oppo_generated": -2.284450054168701, |
|
"logits/oppo_real": -2.3912582397460938, |
|
"logits/real": -1.4682029485702515, |
|
"logps/generated": -635.2818603515625, |
|
"logps/oppo_gen": -65.28082275390625, |
|
"logps/oppo_real": -298.7229919433594, |
|
"logps/real": -261.49755859375, |
|
"loss": -1.6128, |
|
"loss/gen": 0.3564888834953308, |
|
"loss/real": -2.3722541332244873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -570.0010986328125, |
|
"rewards/margins": 607.2265625, |
|
"rewards/real": 37.22542953491211, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 125.28287523889688, |
|
"learning_rate": 4.5922939068100356e-07, |
|
"logits/generated": -1.7798149585723877, |
|
"logits/oppo_generated": -2.862884044647217, |
|
"logits/oppo_real": -3.036806344985962, |
|
"logits/real": -1.983008861541748, |
|
"logps/generated": -748.7784423828125, |
|
"logps/oppo_gen": -90.15806579589844, |
|
"logps/oppo_real": -274.20635986328125, |
|
"logps/real": -275.6498718261719, |
|
"loss": -1.5349, |
|
"loss/gen": 0.04587027058005333, |
|
"loss/real": -1.985565185546875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -658.620361328125, |
|
"rewards/margins": 657.1768798828125, |
|
"rewards/real": -1.4434819221496582, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 66.76624114798773, |
|
"learning_rate": 4.5878136200716844e-07, |
|
"logits/generated": -1.8081977367401123, |
|
"logits/oppo_generated": -2.962895393371582, |
|
"logits/oppo_real": -2.877319812774658, |
|
"logits/real": -2.0002503395080566, |
|
"logps/generated": -730.475341796875, |
|
"logps/oppo_gen": -100.83236694335938, |
|
"logps/oppo_real": -181.53245544433594, |
|
"logps/real": -265.831298828125, |
|
"loss": -1.5555, |
|
"loss/gen": 0.8545611500740051, |
|
"loss/real": -1.1570115089416504, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -629.6429443359375, |
|
"rewards/margins": 545.3441162109375, |
|
"rewards/real": -84.29884338378906, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 58.158852709088926, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/generated": -1.9816755056381226, |
|
"logits/oppo_generated": -2.6472039222717285, |
|
"logits/oppo_real": -2.741997241973877, |
|
"logits/real": -1.9651538133621216, |
|
"logps/generated": -828.5115356445312, |
|
"logps/oppo_gen": -144.51702880859375, |
|
"logps/oppo_real": -452.9317626953125, |
|
"logps/real": -480.84735107421875, |
|
"loss": -1.7693, |
|
"loss/gen": 0.09539352357387543, |
|
"loss/real": -1.720844030380249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -683.9945068359375, |
|
"rewards/margins": 656.0789184570312, |
|
"rewards/real": -27.915592193603516, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 90.54872868033543, |
|
"learning_rate": 4.578853046594982e-07, |
|
"logits/generated": -2.1033763885498047, |
|
"logits/oppo_generated": -2.739530086517334, |
|
"logits/oppo_real": -2.9463746547698975, |
|
"logits/real": -2.074946880340576, |
|
"logps/generated": -545.0913696289062, |
|
"logps/oppo_gen": -90.12626647949219, |
|
"logps/oppo_real": -418.7986755371094, |
|
"logps/real": -383.2454528808594, |
|
"loss": -1.6797, |
|
"loss/gen": 0.09754064679145813, |
|
"loss/real": -2.35553240776062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -454.965087890625, |
|
"rewards/margins": 490.51837158203125, |
|
"rewards/real": 35.55324935913086, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 83.66236669831007, |
|
"learning_rate": 4.574372759856631e-07, |
|
"logits/generated": -1.986171007156372, |
|
"logits/oppo_generated": -2.7536940574645996, |
|
"logits/oppo_real": -3.0076608657836914, |
|
"logits/real": -2.0253145694732666, |
|
"logps/generated": -483.1329345703125, |
|
"logps/oppo_gen": -57.10042190551758, |
|
"logps/oppo_real": -238.064697265625, |
|
"logps/real": -238.10501098632812, |
|
"loss": -1.6896, |
|
"loss/gen": 0.1678832322359085, |
|
"loss/real": -1.9995965957641602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -426.032470703125, |
|
"rewards/margins": 425.9921569824219, |
|
"rewards/real": -0.04033064842224121, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 67.85836942324248, |
|
"learning_rate": 4.569892473118279e-07, |
|
"logits/generated": -1.7882883548736572, |
|
"logits/oppo_generated": -2.4256725311279297, |
|
"logits/oppo_real": -2.8077471256256104, |
|
"logits/real": -1.5553542375564575, |
|
"logps/generated": -575.3634643554688, |
|
"logps/oppo_gen": -58.635196685791016, |
|
"logps/oppo_real": -250.21864318847656, |
|
"logps/real": -216.97225952148438, |
|
"loss": -1.7029, |
|
"loss/gen": 0.1162588894367218, |
|
"loss/real": -2.3324639797210693, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -516.728271484375, |
|
"rewards/margins": 549.974609375, |
|
"rewards/real": 33.24639892578125, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 73.62041051060501, |
|
"learning_rate": 4.5654121863799285e-07, |
|
"logits/generated": -1.89347505569458, |
|
"logits/oppo_generated": -2.624286651611328, |
|
"logits/oppo_real": -2.8371405601501465, |
|
"logits/real": -1.9667630195617676, |
|
"logps/generated": -548.8859252929688, |
|
"logps/oppo_gen": -106.68203735351562, |
|
"logps/oppo_real": -485.33148193359375, |
|
"logps/real": -392.25909423828125, |
|
"loss": -1.6779, |
|
"loss/gen": 0.1170816719532013, |
|
"loss/real": -2.9307241439819336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -442.203857421875, |
|
"rewards/margins": 535.2762451171875, |
|
"rewards/real": 93.0723876953125, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 82.21296242932893, |
|
"learning_rate": 4.560931899641577e-07, |
|
"logits/generated": -1.917464256286621, |
|
"logits/oppo_generated": -2.9165024757385254, |
|
"logits/oppo_real": -2.8914356231689453, |
|
"logits/real": -2.142932415008545, |
|
"logps/generated": -1235.953369140625, |
|
"logps/oppo_gen": -83.79830169677734, |
|
"logps/oppo_real": -193.01220703125, |
|
"logps/real": -226.86917114257812, |
|
"loss": -1.7045, |
|
"loss/gen": 0.0816095620393753, |
|
"loss/real": -1.6614303588867188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1152.155029296875, |
|
"rewards/margins": 1118.2979736328125, |
|
"rewards/real": -33.856971740722656, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 54.73798339518798, |
|
"learning_rate": 4.5564516129032256e-07, |
|
"logits/generated": -2.137822389602661, |
|
"logits/oppo_generated": -2.9398818016052246, |
|
"logits/oppo_real": -3.185572624206543, |
|
"logits/real": -2.284759044647217, |
|
"logps/generated": -675.8824462890625, |
|
"logps/oppo_gen": -117.77006530761719, |
|
"logps/oppo_real": -377.13311767578125, |
|
"logps/real": -368.2445373535156, |
|
"loss": -1.8385, |
|
"loss/gen": 0.03665899857878685, |
|
"loss/real": -2.08888578414917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -558.1124267578125, |
|
"rewards/margins": 567.0009765625, |
|
"rewards/real": 8.888594627380371, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 63.7378626619904, |
|
"learning_rate": 4.5519713261648744e-07, |
|
"logits/generated": -1.9477636814117432, |
|
"logits/oppo_generated": -2.771664619445801, |
|
"logits/oppo_real": -2.8211355209350586, |
|
"logits/real": -2.0558881759643555, |
|
"logps/generated": -591.1102294921875, |
|
"logps/oppo_gen": -88.96675109863281, |
|
"logps/oppo_real": -326.7928466796875, |
|
"logps/real": -351.2818603515625, |
|
"loss": -1.7565, |
|
"loss/gen": 0.07419906556606293, |
|
"loss/real": -1.7551099061965942, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -502.1435241699219, |
|
"rewards/margins": 477.6545104980469, |
|
"rewards/real": -24.489017486572266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 102.74617921728282, |
|
"learning_rate": 4.547491039426523e-07, |
|
"logits/generated": -1.9030685424804688, |
|
"logits/oppo_generated": -2.7817769050598145, |
|
"logits/oppo_real": -2.727473258972168, |
|
"logits/real": -2.137336492538452, |
|
"logps/generated": -511.839599609375, |
|
"logps/oppo_gen": -66.81544494628906, |
|
"logps/oppo_real": -323.5364685058594, |
|
"logps/real": -293.64990234375, |
|
"loss": -1.7376, |
|
"loss/gen": 0.10144515335559845, |
|
"loss/real": -2.29886531829834, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -445.0241394042969, |
|
"rewards/margins": 474.9106750488281, |
|
"rewards/real": 29.886547088623047, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 72.97101508797003, |
|
"learning_rate": 4.5430107526881715e-07, |
|
"logits/generated": -2.1376824378967285, |
|
"logits/oppo_generated": -2.7992939949035645, |
|
"logits/oppo_real": -2.914294719696045, |
|
"logits/real": -2.1265478134155273, |
|
"logps/generated": -582.9034423828125, |
|
"logps/oppo_gen": -68.68360900878906, |
|
"logps/oppo_real": -253.99221801757812, |
|
"logps/real": -252.176025390625, |
|
"loss": -1.7525, |
|
"loss/gen": 0.07931329309940338, |
|
"loss/real": -2.018162250518799, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -514.2198486328125, |
|
"rewards/margins": 516.0361328125, |
|
"rewards/real": 1.8162250518798828, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 84.03182525835078, |
|
"learning_rate": 4.538530465949821e-07, |
|
"logits/generated": -1.9512498378753662, |
|
"logits/oppo_generated": -2.6728546619415283, |
|
"logits/oppo_real": -2.74894380569458, |
|
"logits/real": -1.980553388595581, |
|
"logps/generated": -495.83770751953125, |
|
"logps/oppo_gen": -80.91419982910156, |
|
"logps/oppo_real": -178.15316772460938, |
|
"logps/real": -180.73532104492188, |
|
"loss": -1.6561, |
|
"loss/gen": 0.21434536576271057, |
|
"loss/real": -1.9741783142089844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -414.92352294921875, |
|
"rewards/margins": 412.34136962890625, |
|
"rewards/real": -2.5821542739868164, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 122.909519566224, |
|
"learning_rate": 4.534050179211469e-07, |
|
"logits/generated": -1.907271385192871, |
|
"logits/oppo_generated": -2.649775981903076, |
|
"logits/oppo_real": -2.8270368576049805, |
|
"logits/real": -1.920291543006897, |
|
"logps/generated": -488.8565673828125, |
|
"logps/oppo_gen": -64.18344116210938, |
|
"logps/oppo_real": -235.9340057373047, |
|
"logps/real": -287.48065185546875, |
|
"loss": -1.602, |
|
"loss/gen": 0.21500566601753235, |
|
"loss/real": -1.4845335483551025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -424.673095703125, |
|
"rewards/margins": 373.12646484375, |
|
"rewards/real": -51.5466423034668, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 96.94029553512564, |
|
"learning_rate": 4.529569892473118e-07, |
|
"logits/generated": -2.026323080062866, |
|
"logits/oppo_generated": -2.943478584289551, |
|
"logits/oppo_real": -2.859900951385498, |
|
"logits/real": -2.257244825363159, |
|
"logps/generated": -603.081787109375, |
|
"logps/oppo_gen": -85.38736724853516, |
|
"logps/oppo_real": -270.22747802734375, |
|
"logps/real": -247.45028686523438, |
|
"loss": -1.6835, |
|
"loss/gen": 0.0911005511879921, |
|
"loss/real": -2.2277719974517822, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -517.6943969726562, |
|
"rewards/margins": 540.4715576171875, |
|
"rewards/real": 22.777204513549805, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 65.43579495749059, |
|
"learning_rate": 4.5250896057347673e-07, |
|
"logits/generated": -2.3204407691955566, |
|
"logits/oppo_generated": -2.9215641021728516, |
|
"logits/oppo_real": -3.185364246368408, |
|
"logits/real": -2.2590651512145996, |
|
"logps/generated": -651.804443359375, |
|
"logps/oppo_gen": -160.45762634277344, |
|
"logps/oppo_real": -471.71771240234375, |
|
"logps/real": -498.5760803222656, |
|
"loss": -1.7785, |
|
"loss/gen": 0.08252400159835815, |
|
"loss/real": -1.7314162254333496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -491.3468322753906, |
|
"rewards/margins": 464.48846435546875, |
|
"rewards/real": -26.858369827270508, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 97.98889346343033, |
|
"learning_rate": 4.5206093189964156e-07, |
|
"logits/generated": -2.0279428958892822, |
|
"logits/oppo_generated": -2.8301095962524414, |
|
"logits/oppo_real": -2.9588708877563477, |
|
"logits/real": -1.925227403640747, |
|
"logps/generated": -647.3221435546875, |
|
"logps/oppo_gen": -65.44461059570312, |
|
"logps/oppo_real": -184.59007263183594, |
|
"logps/real": -213.09719848632812, |
|
"loss": -1.7926, |
|
"loss/gen": 0.09245370328426361, |
|
"loss/real": -1.7149286270141602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -581.87744140625, |
|
"rewards/margins": 553.3703002929688, |
|
"rewards/real": -28.50714111328125, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 101.17580711470751, |
|
"learning_rate": 4.5161290322580644e-07, |
|
"logits/generated": -1.8266165256500244, |
|
"logits/oppo_generated": -3.001574993133545, |
|
"logits/oppo_real": -2.8634276390075684, |
|
"logits/real": -2.2667245864868164, |
|
"logps/generated": -614.9871215820312, |
|
"logps/oppo_gen": -83.24380493164062, |
|
"logps/oppo_real": -339.7986755371094, |
|
"logps/real": -321.4783935546875, |
|
"loss": -1.7798, |
|
"loss/gen": 0.04210636392235756, |
|
"loss/real": -2.1832029819488525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -531.7432861328125, |
|
"rewards/margins": 550.0635986328125, |
|
"rewards/real": 18.320310592651367, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 125.15491182180986, |
|
"learning_rate": 4.511648745519713e-07, |
|
"logits/generated": -1.8712575435638428, |
|
"logits/oppo_generated": -2.7472705841064453, |
|
"logits/oppo_real": -2.8078293800354004, |
|
"logits/real": -1.9940369129180908, |
|
"logps/generated": -590.0716552734375, |
|
"logps/oppo_gen": -87.50840759277344, |
|
"logps/oppo_real": -388.77752685546875, |
|
"logps/real": -361.9477233886719, |
|
"loss": -1.8277, |
|
"loss/gen": 0.14459219574928284, |
|
"loss/real": -2.2682981491088867, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -502.56329345703125, |
|
"rewards/margins": 529.39306640625, |
|
"rewards/real": 26.829811096191406, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 130.95518973584055, |
|
"learning_rate": 4.507168458781362e-07, |
|
"logits/generated": -1.9332165718078613, |
|
"logits/oppo_generated": -2.7329964637756348, |
|
"logits/oppo_real": -2.917022705078125, |
|
"logits/real": -1.8709020614624023, |
|
"logps/generated": -633.012939453125, |
|
"logps/oppo_gen": -51.14801788330078, |
|
"logps/oppo_real": -223.8541717529297, |
|
"logps/real": -240.81948852539062, |
|
"loss": -1.7284, |
|
"loss/gen": 0.03336421027779579, |
|
"loss/real": -1.830346703529358, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -581.8649291992188, |
|
"rewards/margins": 564.8995971679688, |
|
"rewards/real": -16.965333938598633, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 72.55144659029033, |
|
"learning_rate": 4.5026881720430103e-07, |
|
"logits/generated": -1.6202969551086426, |
|
"logits/oppo_generated": -2.933967113494873, |
|
"logits/oppo_real": -2.779536247253418, |
|
"logits/real": -2.1590824127197266, |
|
"logps/generated": -601.2861328125, |
|
"logps/oppo_gen": -59.4964599609375, |
|
"logps/oppo_real": -305.5668029785156, |
|
"logps/real": -321.80731201171875, |
|
"loss": -1.6793, |
|
"loss/gen": 0.09511305391788483, |
|
"loss/real": -1.8375946283340454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -541.7896728515625, |
|
"rewards/margins": 525.5491333007812, |
|
"rewards/real": -16.240537643432617, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 60.15745737424725, |
|
"learning_rate": 4.4982078853046596e-07, |
|
"logits/generated": -1.8742468357086182, |
|
"logits/oppo_generated": -2.7897162437438965, |
|
"logits/oppo_real": -2.9050936698913574, |
|
"logits/real": -1.9126145839691162, |
|
"logps/generated": -610.4820556640625, |
|
"logps/oppo_gen": -68.6431884765625, |
|
"logps/oppo_real": -279.5192565917969, |
|
"logps/real": -256.11859130859375, |
|
"loss": -1.8501, |
|
"loss/gen": 0.03932388871908188, |
|
"loss/real": -2.234006881713867, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -541.8388671875, |
|
"rewards/margins": 565.239501953125, |
|
"rewards/real": 23.400684356689453, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 51.045033636822836, |
|
"learning_rate": 4.493727598566308e-07, |
|
"logits/generated": -2.009788990020752, |
|
"logits/oppo_generated": -2.852003574371338, |
|
"logits/oppo_real": -3.0418591499328613, |
|
"logits/real": -2.0836453437805176, |
|
"logps/generated": -653.099365234375, |
|
"logps/oppo_gen": -87.85763549804688, |
|
"logps/oppo_real": -353.83575439453125, |
|
"logps/real": -326.4531555175781, |
|
"loss": -1.8601, |
|
"loss/gen": 0.03006863407790661, |
|
"loss/real": -2.2738256454467773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -565.2417602539062, |
|
"rewards/margins": 592.6243896484375, |
|
"rewards/real": 27.38258934020996, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 38.445754639552696, |
|
"learning_rate": 4.489247311827957e-07, |
|
"logits/generated": -1.6755175590515137, |
|
"logits/oppo_generated": -2.8767549991607666, |
|
"logits/oppo_real": -2.8858747482299805, |
|
"logits/real": -2.09330677986145, |
|
"logps/generated": -611.26708984375, |
|
"logps/oppo_gen": -81.34989166259766, |
|
"logps/oppo_real": -298.17315673828125, |
|
"logps/real": -290.9176025390625, |
|
"loss": -1.8951, |
|
"loss/gen": 0.3788173794746399, |
|
"loss/real": -2.0725557804107666, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -529.917236328125, |
|
"rewards/margins": 537.1728515625, |
|
"rewards/real": 7.255581855773926, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 59.257956183906224, |
|
"learning_rate": 4.4847670250896056e-07, |
|
"logits/generated": -1.944122314453125, |
|
"logits/oppo_generated": -2.9232120513916016, |
|
"logits/oppo_real": -2.9117484092712402, |
|
"logits/real": -1.9138293266296387, |
|
"logps/generated": -694.2562255859375, |
|
"logps/oppo_gen": -70.24262237548828, |
|
"logps/oppo_real": -278.1219482421875, |
|
"logps/real": -255.45455932617188, |
|
"loss": -1.6892, |
|
"loss/gen": 0.13799193501472473, |
|
"loss/real": -2.2266738414764404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -624.0136108398438, |
|
"rewards/margins": 646.6810302734375, |
|
"rewards/real": 22.667388916015625, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 47.609623707095714, |
|
"learning_rate": 4.4802867383512544e-07, |
|
"logits/generated": -0.9755500555038452, |
|
"logits/oppo_generated": -2.88938045501709, |
|
"logits/oppo_real": -2.7122931480407715, |
|
"logits/real": -1.8035005331039429, |
|
"logps/generated": -692.7864379882812, |
|
"logps/oppo_gen": -54.18265914916992, |
|
"logps/oppo_real": -184.002197265625, |
|
"logps/real": -166.90866088867188, |
|
"loss": -1.8166, |
|
"loss/gen": 0.05148601904511452, |
|
"loss/real": -2.1709353923797607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -638.603759765625, |
|
"rewards/margins": 655.6973266601562, |
|
"rewards/real": 17.09354019165039, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 58.80905235311886, |
|
"learning_rate": 4.475806451612903e-07, |
|
"logits/generated": -0.8548814654350281, |
|
"logits/oppo_generated": -2.9243669509887695, |
|
"logits/oppo_real": -2.7173829078674316, |
|
"logits/real": -1.9759352207183838, |
|
"logps/generated": -570.697021484375, |
|
"logps/oppo_gen": -67.00720977783203, |
|
"logps/oppo_real": -217.82373046875, |
|
"logps/real": -219.30230712890625, |
|
"loss": -1.6971, |
|
"loss/gen": 0.40054354071617126, |
|
"loss/real": -1.9852139949798584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -503.6898498535156, |
|
"rewards/margins": 502.21124267578125, |
|
"rewards/real": -1.4785995483398438, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 47.13598957985156, |
|
"learning_rate": 4.4713261648745515e-07, |
|
"logits/generated": -1.7058589458465576, |
|
"logits/oppo_generated": -2.831021308898926, |
|
"logits/oppo_real": -2.949223756790161, |
|
"logits/real": -1.647479772567749, |
|
"logps/generated": -702.8096923828125, |
|
"logps/oppo_gen": -56.023048400878906, |
|
"logps/oppo_real": -286.0043640136719, |
|
"logps/real": -275.82373046875, |
|
"loss": -1.8097, |
|
"loss/gen": 0.04578549787402153, |
|
"loss/real": -2.101806640625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -646.7866821289062, |
|
"rewards/margins": 656.9673461914062, |
|
"rewards/real": 10.180654525756836, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 46.640897193510604, |
|
"learning_rate": 4.466845878136201e-07, |
|
"logits/generated": -1.0372296571731567, |
|
"logits/oppo_generated": -2.9866466522216797, |
|
"logits/oppo_real": -2.8740952014923096, |
|
"logits/real": -2.0625128746032715, |
|
"logps/generated": -717.514892578125, |
|
"logps/oppo_gen": -74.18051147460938, |
|
"logps/oppo_real": -289.81561279296875, |
|
"logps/real": -262.28466796875, |
|
"loss": -1.9636, |
|
"loss/gen": 0.028024822473526, |
|
"loss/real": -2.2753095626831055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -643.3343505859375, |
|
"rewards/margins": 670.8653564453125, |
|
"rewards/real": 27.53096580505371, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 86.45016402167784, |
|
"learning_rate": 4.462365591397849e-07, |
|
"logits/generated": -1.1898678541183472, |
|
"logits/oppo_generated": -2.7843871116638184, |
|
"logits/oppo_real": -2.832613945007324, |
|
"logits/real": -1.442640781402588, |
|
"logps/generated": -915.6947631835938, |
|
"logps/oppo_gen": -64.36344909667969, |
|
"logps/oppo_real": -354.620361328125, |
|
"logps/real": -332.0544738769531, |
|
"loss": -1.9388, |
|
"loss/gen": 0.12847158312797546, |
|
"loss/real": -2.225658655166626, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -851.3313598632812, |
|
"rewards/margins": 873.897216796875, |
|
"rewards/real": 22.56588363647461, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 60.7973006602935, |
|
"learning_rate": 4.457885304659498e-07, |
|
"logits/generated": -1.986168622970581, |
|
"logits/oppo_generated": -2.730388879776001, |
|
"logits/oppo_real": -2.9984025955200195, |
|
"logits/real": -1.6806275844573975, |
|
"logps/generated": -885.4449462890625, |
|
"logps/oppo_gen": -143.77706909179688, |
|
"logps/oppo_real": -439.7186279296875, |
|
"logps/real": -421.04217529296875, |
|
"loss": -1.8178, |
|
"loss/gen": 0.028874732553958893, |
|
"loss/real": -2.1867642402648926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -741.6678466796875, |
|
"rewards/margins": 760.34423828125, |
|
"rewards/real": 18.676427841186523, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 84.26484997557552, |
|
"learning_rate": 4.4534050179211467e-07, |
|
"logits/generated": -1.9005441665649414, |
|
"logits/oppo_generated": -2.915806770324707, |
|
"logits/oppo_real": -3.1570920944213867, |
|
"logits/real": -1.8480937480926514, |
|
"logps/generated": -750.3797607421875, |
|
"logps/oppo_gen": -118.90010070800781, |
|
"logps/oppo_real": -341.41363525390625, |
|
"logps/real": -317.5443115234375, |
|
"loss": -1.8443, |
|
"loss/gen": 0.14120692014694214, |
|
"loss/real": -2.2386932373046875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -631.4796142578125, |
|
"rewards/margins": 655.3489990234375, |
|
"rewards/real": 23.86932373046875, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 143.02717726886732, |
|
"learning_rate": 4.4489247311827955e-07, |
|
"logits/generated": -1.4650170803070068, |
|
"logits/oppo_generated": -2.837372303009033, |
|
"logits/oppo_real": -2.9020771980285645, |
|
"logits/real": -1.7127195596694946, |
|
"logps/generated": -478.16339111328125, |
|
"logps/oppo_gen": -67.94302368164062, |
|
"logps/oppo_real": -255.73797607421875, |
|
"logps/real": -229.78802490234375, |
|
"loss": -1.8535, |
|
"loss/gen": 0.3787376582622528, |
|
"loss/real": -2.2594995498657227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -410.22039794921875, |
|
"rewards/margins": 436.1703186035156, |
|
"rewards/real": 25.949939727783203, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 62.02056150042601, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/generated": -1.8700604438781738, |
|
"logits/oppo_generated": -2.756680727005005, |
|
"logits/oppo_real": -3.0085153579711914, |
|
"logits/real": -1.58890962600708, |
|
"logps/generated": -702.9028930664062, |
|
"logps/oppo_gen": -85.88131713867188, |
|
"logps/oppo_real": -249.00379943847656, |
|
"logps/real": -246.01797485351562, |
|
"loss": -1.8894, |
|
"loss/gen": 0.031073393300175667, |
|
"loss/real": -2.029858350753784, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -617.0215454101562, |
|
"rewards/margins": 620.0074462890625, |
|
"rewards/real": 2.9858341217041016, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 61.25214042816177, |
|
"learning_rate": 4.439964157706093e-07, |
|
"logits/generated": -0.981428325176239, |
|
"logits/oppo_generated": -2.7159523963928223, |
|
"logits/oppo_real": -2.960238218307495, |
|
"logits/real": -1.0548228025436401, |
|
"logps/generated": -598.4140625, |
|
"logps/oppo_gen": -58.635005950927734, |
|
"logps/oppo_real": -400.1387634277344, |
|
"logps/real": -425.5389404296875, |
|
"loss": -1.8947, |
|
"loss/gen": 0.1158125251531601, |
|
"loss/real": -1.7459979057312012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -539.779052734375, |
|
"rewards/margins": 514.37890625, |
|
"rewards/real": -25.400211334228516, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 81.0536118151514, |
|
"learning_rate": 4.4354838709677415e-07, |
|
"logits/generated": -1.4463560581207275, |
|
"logits/oppo_generated": -2.678307294845581, |
|
"logits/oppo_real": -2.8269057273864746, |
|
"logits/real": -1.380929708480835, |
|
"logps/generated": -846.7905883789062, |
|
"logps/oppo_gen": -67.97695922851562, |
|
"logps/oppo_real": -219.36227416992188, |
|
"logps/real": -325.5843505859375, |
|
"loss": -1.93, |
|
"loss/gen": 0.5336862206459045, |
|
"loss/real": -0.9377790689468384, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -778.8135986328125, |
|
"rewards/margins": 672.591552734375, |
|
"rewards/real": -106.22209167480469, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 66.29716039592118, |
|
"learning_rate": 4.4310035842293903e-07, |
|
"logits/generated": -2.113171100616455, |
|
"logits/oppo_generated": -2.955277442932129, |
|
"logits/oppo_real": -3.2049663066864014, |
|
"logits/real": -2.1531317234039307, |
|
"logps/generated": -731.0936279296875, |
|
"logps/oppo_gen": -87.20503234863281, |
|
"logps/oppo_real": -395.21441650390625, |
|
"logps/real": -363.5802001953125, |
|
"loss": -1.8696, |
|
"loss/gen": 0.11870712786912918, |
|
"loss/real": -2.3163421154022217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -643.8885498046875, |
|
"rewards/margins": 675.5227661132812, |
|
"rewards/real": 31.634204864501953, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 66.8398673778887, |
|
"learning_rate": 4.4265232974910396e-07, |
|
"logits/generated": -1.6186769008636475, |
|
"logits/oppo_generated": -2.8609347343444824, |
|
"logits/oppo_real": -2.865668773651123, |
|
"logits/real": -1.6237159967422485, |
|
"logps/generated": -770.720703125, |
|
"logps/oppo_gen": -97.59341430664062, |
|
"logps/oppo_real": -205.549560546875, |
|
"logps/real": -184.6165771484375, |
|
"loss": -1.8003, |
|
"loss/gen": 0.018834060057997704, |
|
"loss/real": -2.2093300819396973, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -673.1272583007812, |
|
"rewards/margins": 694.060302734375, |
|
"rewards/real": 20.9329833984375, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 54.69193731997889, |
|
"learning_rate": 4.422043010752688e-07, |
|
"logits/generated": -2.053788900375366, |
|
"logits/oppo_generated": -2.971303939819336, |
|
"logits/oppo_real": -3.011564016342163, |
|
"logits/real": -2.120933771133423, |
|
"logps/generated": -695.5224609375, |
|
"logps/oppo_gen": -89.73414611816406, |
|
"logps/oppo_real": -350.30523681640625, |
|
"logps/real": -328.1854248046875, |
|
"loss": -1.9518, |
|
"loss/gen": 0.09373271465301514, |
|
"loss/real": -2.2211976051330566, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -605.788330078125, |
|
"rewards/margins": 627.9080810546875, |
|
"rewards/real": 22.119773864746094, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 52.319817289914155, |
|
"learning_rate": 4.4175627240143367e-07, |
|
"logits/generated": -1.4142050743103027, |
|
"logits/oppo_generated": -2.840271472930908, |
|
"logits/oppo_real": -2.7079410552978516, |
|
"logits/real": -2.1080374717712402, |
|
"logps/generated": -555.2352905273438, |
|
"logps/oppo_gen": -53.288421630859375, |
|
"logps/oppo_real": -155.62603759765625, |
|
"logps/real": -192.53866577148438, |
|
"loss": -1.7625, |
|
"loss/gen": 0.1426987648010254, |
|
"loss/real": -1.6308739185333252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -501.94683837890625, |
|
"rewards/margins": 465.03424072265625, |
|
"rewards/real": -36.9126091003418, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 58.41200981497726, |
|
"learning_rate": 4.4130824372759855e-07, |
|
"logits/generated": -1.7520662546157837, |
|
"logits/oppo_generated": -3.0052433013916016, |
|
"logits/oppo_real": -2.8851370811462402, |
|
"logits/real": -2.3384604454040527, |
|
"logps/generated": -719.3140258789062, |
|
"logps/oppo_gen": -76.29854583740234, |
|
"logps/oppo_real": -393.45806884765625, |
|
"logps/real": -362.9105224609375, |
|
"loss": -1.877, |
|
"loss/gen": 0.030190223827958107, |
|
"loss/real": -2.3054752349853516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -643.0155029296875, |
|
"rewards/margins": 673.56298828125, |
|
"rewards/real": 30.547513961791992, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 48.523557624428676, |
|
"learning_rate": 4.4086021505376344e-07, |
|
"logits/generated": -1.3822331428527832, |
|
"logits/oppo_generated": -2.7603323459625244, |
|
"logits/oppo_real": -2.456112861633301, |
|
"logits/real": -2.125643253326416, |
|
"logps/generated": -608.9871826171875, |
|
"logps/oppo_gen": -69.90534210205078, |
|
"logps/oppo_real": -236.9437255859375, |
|
"logps/real": -226.87928771972656, |
|
"loss": -1.9436, |
|
"loss/gen": 0.6278254389762878, |
|
"loss/real": -2.100644111633301, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -539.081787109375, |
|
"rewards/margins": 549.146240234375, |
|
"rewards/real": 10.06441879272461, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 64.36034041215716, |
|
"learning_rate": 4.4041218637992826e-07, |
|
"logits/generated": -1.5218685865402222, |
|
"logits/oppo_generated": -2.696648597717285, |
|
"logits/oppo_real": -2.8764772415161133, |
|
"logits/real": -1.5772918462753296, |
|
"logps/generated": -611.6922607421875, |
|
"logps/oppo_gen": -87.64535522460938, |
|
"logps/oppo_real": -305.02203369140625, |
|
"logps/real": -342.9971618652344, |
|
"loss": -1.7637, |
|
"loss/gen": 0.49089139699935913, |
|
"loss/real": -1.620248556137085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -524.046875, |
|
"rewards/margins": 486.0717468261719, |
|
"rewards/real": -37.975135803222656, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 60.28151730111809, |
|
"learning_rate": 4.399641577060932e-07, |
|
"logits/generated": -1.8260202407836914, |
|
"logits/oppo_generated": -2.817328453063965, |
|
"logits/oppo_real": -2.961047649383545, |
|
"logits/real": -1.7334787845611572, |
|
"logps/generated": -653.2313842773438, |
|
"logps/oppo_gen": -62.92127990722656, |
|
"logps/oppo_real": -255.6164093017578, |
|
"logps/real": -285.0434265136719, |
|
"loss": -1.8254, |
|
"loss/gen": 0.058723676949739456, |
|
"loss/real": -1.7057299613952637, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -590.31005859375, |
|
"rewards/margins": 560.883056640625, |
|
"rewards/real": -29.427001953125, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 99.86272804768053, |
|
"learning_rate": 4.3951612903225803e-07, |
|
"logits/generated": -1.548148512840271, |
|
"logits/oppo_generated": -2.848414897918701, |
|
"logits/oppo_real": -2.944563388824463, |
|
"logits/real": -2.128657102584839, |
|
"logps/generated": -713.473876953125, |
|
"logps/oppo_gen": -95.9730453491211, |
|
"logps/oppo_real": -342.0549621582031, |
|
"logps/real": -321.3438720703125, |
|
"loss": -1.8176, |
|
"loss/gen": 0.12640155851840973, |
|
"loss/real": -2.20711088180542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -617.5008544921875, |
|
"rewards/margins": 638.2119140625, |
|
"rewards/real": 20.71107292175293, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 47.58197849747712, |
|
"learning_rate": 4.390681003584229e-07, |
|
"logits/generated": -1.8227300643920898, |
|
"logits/oppo_generated": -2.591665744781494, |
|
"logits/oppo_real": -2.58309268951416, |
|
"logits/real": -2.0603384971618652, |
|
"logps/generated": -735.041259765625, |
|
"logps/oppo_gen": -46.3786735534668, |
|
"logps/oppo_real": -55.683685302734375, |
|
"logps/real": -42.93335723876953, |
|
"loss": -1.9832, |
|
"loss/gen": 0.15213513374328613, |
|
"loss/real": -2.1275031566619873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -688.66259765625, |
|
"rewards/margins": 701.4129028320312, |
|
"rewards/real": 12.75033187866211, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 85.49574242414793, |
|
"learning_rate": 4.386200716845878e-07, |
|
"logits/generated": -2.351722240447998, |
|
"logits/oppo_generated": -2.9608449935913086, |
|
"logits/oppo_real": -3.0993542671203613, |
|
"logits/real": -2.4843802452087402, |
|
"logps/generated": -788.0579833984375, |
|
"logps/oppo_gen": -151.79364013671875, |
|
"logps/oppo_real": -470.64190673828125, |
|
"logps/real": -452.0572814941406, |
|
"loss": -1.8285, |
|
"loss/gen": 0.014275267720222473, |
|
"loss/real": -2.1858463287353516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -636.2643432617188, |
|
"rewards/margins": 654.8489990234375, |
|
"rewards/real": 18.584644317626953, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 79.58229575021696, |
|
"learning_rate": 4.3817204301075267e-07, |
|
"logits/generated": -2.1911535263061523, |
|
"logits/oppo_generated": -2.955319404602051, |
|
"logits/oppo_real": -3.0618791580200195, |
|
"logits/real": -2.3935108184814453, |
|
"logps/generated": -570.6660766601562, |
|
"logps/oppo_gen": -117.42491149902344, |
|
"logps/oppo_real": -394.2685546875, |
|
"logps/real": -364.75994873046875, |
|
"loss": -1.9488, |
|
"loss/gen": 0.6023984551429749, |
|
"loss/real": -2.295085906982422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -453.24114990234375, |
|
"rewards/margins": 482.749755859375, |
|
"rewards/real": 29.50858497619629, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 70.38357171932063, |
|
"learning_rate": 4.377240143369175e-07, |
|
"logits/generated": -1.1671700477600098, |
|
"logits/oppo_generated": -2.867384433746338, |
|
"logits/oppo_real": -2.831949234008789, |
|
"logits/real": -1.4764811992645264, |
|
"logps/generated": -583.820068359375, |
|
"logps/oppo_gen": -63.369956970214844, |
|
"logps/oppo_real": -158.0244598388672, |
|
"logps/real": -278.0089416503906, |
|
"loss": -1.7578, |
|
"loss/gen": 0.7094503045082092, |
|
"loss/real": -0.8001553416252136, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -520.4500732421875, |
|
"rewards/margins": 400.4656066894531, |
|
"rewards/real": -119.98446655273438, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 74.56631119765144, |
|
"learning_rate": 4.3727598566308243e-07, |
|
"logits/generated": -1.9085487127304077, |
|
"logits/oppo_generated": -2.6366734504699707, |
|
"logits/oppo_real": -2.7779293060302734, |
|
"logits/real": -2.0540764331817627, |
|
"logps/generated": -625.862060546875, |
|
"logps/oppo_gen": -67.02287292480469, |
|
"logps/oppo_real": -384.6851806640625, |
|
"logps/real": -352.558349609375, |
|
"loss": -1.7415, |
|
"loss/gen": 0.05960576981306076, |
|
"loss/real": -2.3212685585021973, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -558.8392333984375, |
|
"rewards/margins": 590.966064453125, |
|
"rewards/real": 32.12684631347656, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 65.722717426606, |
|
"learning_rate": 4.368279569892473e-07, |
|
"logits/generated": -1.732559323310852, |
|
"logits/oppo_generated": -2.634063959121704, |
|
"logits/oppo_real": -2.9160523414611816, |
|
"logits/real": -1.951407551765442, |
|
"logps/generated": -677.4447631835938, |
|
"logps/oppo_gen": -60.14728546142578, |
|
"logps/oppo_real": -206.13426208496094, |
|
"logps/real": -179.4784393310547, |
|
"loss": -1.912, |
|
"loss/gen": 0.08394724130630493, |
|
"loss/real": -2.2665581703186035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -617.2974853515625, |
|
"rewards/margins": 643.953369140625, |
|
"rewards/real": 26.65583038330078, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 69.63255260876554, |
|
"learning_rate": 4.3637992831541214e-07, |
|
"logits/generated": -2.192533493041992, |
|
"logits/oppo_generated": -2.894730567932129, |
|
"logits/oppo_real": -3.0438637733459473, |
|
"logits/real": -2.3837125301361084, |
|
"logps/generated": -801.6678466796875, |
|
"logps/oppo_gen": -99.43392181396484, |
|
"logps/oppo_real": -454.5760192871094, |
|
"logps/real": -435.48876953125, |
|
"loss": -1.9334, |
|
"loss/gen": 0.007741004228591919, |
|
"loss/real": -2.1908724308013916, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -702.2339477539062, |
|
"rewards/margins": 721.3212280273438, |
|
"rewards/real": 19.087242126464844, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 92.43653861907097, |
|
"learning_rate": 4.359318996415771e-07, |
|
"logits/generated": -1.6997716426849365, |
|
"logits/oppo_generated": -2.791623115539551, |
|
"logits/oppo_real": -2.808715581893921, |
|
"logits/real": -1.985607624053955, |
|
"logps/generated": -561.043212890625, |
|
"logps/oppo_gen": -85.10957336425781, |
|
"logps/oppo_real": -352.60321044921875, |
|
"logps/real": -316.6192626953125, |
|
"loss": -1.8481, |
|
"loss/gen": 0.3760773539543152, |
|
"loss/real": -2.35983943939209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -475.93359375, |
|
"rewards/margins": 511.9176025390625, |
|
"rewards/real": 35.98394775390625, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 55.39334883993858, |
|
"learning_rate": 4.354838709677419e-07, |
|
"logits/generated": -2.0199737548828125, |
|
"logits/oppo_generated": -2.7498302459716797, |
|
"logits/oppo_real": -3.0951387882232666, |
|
"logits/real": -2.1367290019989014, |
|
"logps/generated": -683.6644287109375, |
|
"logps/oppo_gen": -88.00972747802734, |
|
"logps/oppo_real": -444.2664794921875, |
|
"logps/real": -413.0567626953125, |
|
"loss": -1.9361, |
|
"loss/gen": 0.04308386147022247, |
|
"loss/real": -2.3120970726013184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -595.65478515625, |
|
"rewards/margins": 626.864501953125, |
|
"rewards/real": 31.209705352783203, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 91.63384592359965, |
|
"learning_rate": 4.350358422939068e-07, |
|
"logits/generated": -1.7172353267669678, |
|
"logits/oppo_generated": -2.704036235809326, |
|
"logits/oppo_real": -2.9259088039398193, |
|
"logits/real": -1.7819669246673584, |
|
"logps/generated": -828.3485717773438, |
|
"logps/oppo_gen": -92.61172485351562, |
|
"logps/oppo_real": -306.844970703125, |
|
"logps/real": -278.81842041015625, |
|
"loss": -1.8423, |
|
"loss/gen": 0.39729800820350647, |
|
"loss/real": -2.2802653312683105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -735.73681640625, |
|
"rewards/margins": 763.7633056640625, |
|
"rewards/real": 28.02651596069336, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 96.31854803366016, |
|
"learning_rate": 4.3458781362007167e-07, |
|
"logits/generated": -0.24818015098571777, |
|
"logits/oppo_generated": -2.877349853515625, |
|
"logits/oppo_real": -2.687556743621826, |
|
"logits/real": -1.999771237373352, |
|
"logps/generated": -567.3941650390625, |
|
"logps/oppo_gen": -45.4771728515625, |
|
"logps/oppo_real": -291.0182189941406, |
|
"logps/real": -295.58465576171875, |
|
"loss": -1.9309, |
|
"loss/gen": 0.07412834465503693, |
|
"loss/real": -1.9543354511260986, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -521.9170532226562, |
|
"rewards/margins": 517.3505859375, |
|
"rewards/real": -4.5664520263671875, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 55.42259963932809, |
|
"learning_rate": 4.3413978494623655e-07, |
|
"logits/generated": -1.5127736330032349, |
|
"logits/oppo_generated": -2.788801908493042, |
|
"logits/oppo_real": -2.9535598754882812, |
|
"logits/real": -1.855088233947754, |
|
"logps/generated": -925.1292114257812, |
|
"logps/oppo_gen": -66.30078125, |
|
"logps/oppo_real": -343.6934814453125, |
|
"logps/real": -336.6583557128906, |
|
"loss": -1.8923, |
|
"loss/gen": 0.016879774630069733, |
|
"loss/real": -2.0703513622283936, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -858.828369140625, |
|
"rewards/margins": 865.863525390625, |
|
"rewards/real": 7.035131454467773, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 104.95930666066492, |
|
"learning_rate": 4.336917562724014e-07, |
|
"logits/generated": -1.429623007774353, |
|
"logits/oppo_generated": -2.987077474594116, |
|
"logits/oppo_real": -2.9259276390075684, |
|
"logits/real": -2.400783061981201, |
|
"logps/generated": -575.1930541992188, |
|
"logps/oppo_gen": -55.12033462524414, |
|
"logps/oppo_real": -192.61981201171875, |
|
"logps/real": -160.576904296875, |
|
"loss": -1.8125, |
|
"loss/gen": 0.07752130925655365, |
|
"loss/real": -2.3204290866851807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -520.07275390625, |
|
"rewards/margins": 552.1156005859375, |
|
"rewards/real": 32.04291534423828, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 97.62940130923675, |
|
"learning_rate": 4.332437275985663e-07, |
|
"logits/generated": -1.4225895404815674, |
|
"logits/oppo_generated": -2.717535972595215, |
|
"logits/oppo_real": -2.961338520050049, |
|
"logits/real": -2.149232864379883, |
|
"logps/generated": -739.74853515625, |
|
"logps/oppo_gen": -85.34925079345703, |
|
"logps/oppo_real": -546.5931396484375, |
|
"logps/real": -515.603759765625, |
|
"loss": -1.7931, |
|
"loss/gen": 0.017313145101070404, |
|
"loss/real": -2.309894323348999, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -654.3992919921875, |
|
"rewards/margins": 685.388671875, |
|
"rewards/real": 30.98943328857422, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 69.46462749238816, |
|
"learning_rate": 4.3279569892473114e-07, |
|
"logits/generated": -1.5553247928619385, |
|
"logits/oppo_generated": -2.8351736068725586, |
|
"logits/oppo_real": -2.9403247833251953, |
|
"logits/real": -1.9800009727478027, |
|
"logps/generated": -723.7879028320312, |
|
"logps/oppo_gen": -70.80876922607422, |
|
"logps/oppo_real": -184.90956115722656, |
|
"logps/real": -161.41104125976562, |
|
"loss": -1.9214, |
|
"loss/gen": 0.08144047111272812, |
|
"loss/real": -2.2349853515625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -652.9791259765625, |
|
"rewards/margins": 676.4776611328125, |
|
"rewards/real": 23.498506546020508, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 53.04196866117551, |
|
"learning_rate": 4.32347670250896e-07, |
|
"logits/generated": -1.5968117713928223, |
|
"logits/oppo_generated": -2.9442050457000732, |
|
"logits/oppo_real": -2.914801597595215, |
|
"logits/real": -2.385002374649048, |
|
"logps/generated": -731.5435791015625, |
|
"logps/oppo_gen": -89.47770690917969, |
|
"logps/oppo_real": -427.4810791015625, |
|
"logps/real": -419.70947265625, |
|
"loss": -1.9711, |
|
"loss/gen": 0.019157692790031433, |
|
"loss/real": -2.0777158737182617, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -642.06591796875, |
|
"rewards/margins": 649.8374633789062, |
|
"rewards/real": 7.771598815917969, |
|
"step": 191 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1146, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|