|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4689, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.066098081023454e-09, |
|
"logits/generated": -0.6986645460128784, |
|
"logits/real": -0.9474660754203796, |
|
"logps/generated": -378.9501953125, |
|
"logps/real": -127.2445068359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0660980810234541e-08, |
|
"logits/generated": -0.7292745113372803, |
|
"logits/real": -0.9080196022987366, |
|
"logps/generated": -411.4975280761719, |
|
"logps/real": -136.8819122314453, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/generated": -0.00470572616904974, |
|
"rewards/margins": 0.0034854437690228224, |
|
"rewards/real": -0.0012202821671962738, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1321961620469082e-08, |
|
"logits/generated": -0.7172996997833252, |
|
"logits/real": -0.8902201652526855, |
|
"logps/generated": -425.13238525390625, |
|
"logps/real": -146.6293182373047, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/generated": -0.04359797015786171, |
|
"rewards/margins": 0.047350525856018066, |
|
"rewards/real": 0.003752560820430517, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.1982942430703625e-08, |
|
"logits/generated": -0.7285000681877136, |
|
"logits/real": -0.9076566696166992, |
|
"logps/generated": -390.41241455078125, |
|
"logps/real": -124.2341537475586, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.1323629468679428, |
|
"rewards/margins": 0.12934455275535583, |
|
"rewards/real": -0.003018400864675641, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2643923240938164e-08, |
|
"logits/generated": -0.7392301559448242, |
|
"logits/real": -0.8854039311408997, |
|
"logps/generated": -411.90313720703125, |
|
"logps/real": -156.40731811523438, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.507174551486969, |
|
"rewards/margins": 0.4850993752479553, |
|
"rewards/real": -0.022075189277529716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.3304904051172704e-08, |
|
"logits/generated": -0.7073003053665161, |
|
"logits/real": -0.8917710185050964, |
|
"logps/generated": -438.045654296875, |
|
"logps/real": -150.26516723632812, |
|
"loss": 0.3997, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.9068630337715149, |
|
"rewards/margins": 0.8774474263191223, |
|
"rewards/real": -0.029415583238005638, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.396588486140725e-08, |
|
"logits/generated": -0.683810830116272, |
|
"logits/real": -0.8957662582397461, |
|
"logps/generated": -449.07598876953125, |
|
"logps/real": -147.19796752929688, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.9475853443145752, |
|
"rewards/margins": 1.8720191717147827, |
|
"rewards/real": -0.07556610554456711, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.462686567164178e-08, |
|
"logits/generated": -0.6989277601242065, |
|
"logits/real": -0.826370894908905, |
|
"logps/generated": -423.2393493652344, |
|
"logps/real": -161.0751190185547, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.234829902648926, |
|
"rewards/margins": 2.123378038406372, |
|
"rewards/real": -0.1114521399140358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.528784648187633e-08, |
|
"logits/generated": -0.7299633622169495, |
|
"logits/real": -0.8775332570075989, |
|
"logps/generated": -423.95709228515625, |
|
"logps/real": -147.74581909179688, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.318953037261963, |
|
"rewards/margins": 2.218860149383545, |
|
"rewards/real": -0.10009302943944931, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.594882729211087e-08, |
|
"logits/generated": -0.7246233224868774, |
|
"logits/real": -0.9104539752006531, |
|
"logps/generated": -413.73638916015625, |
|
"logps/real": -143.83090209960938, |
|
"loss": 0.1662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.487647533416748, |
|
"rewards/margins": 2.347053289413452, |
|
"rewards/real": -0.1405942738056183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0660980810234541e-07, |
|
"logits/generated": -0.7521112561225891, |
|
"logits/real": -0.8818603754043579, |
|
"logps/generated": -452.70709228515625, |
|
"logps/real": -162.36923217773438, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.276683807373047, |
|
"rewards/margins": 3.1224722862243652, |
|
"rewards/real": -0.15421171486377716, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1727078891257995e-07, |
|
"logits/generated": -0.7396367788314819, |
|
"logits/real": -0.8340644836425781, |
|
"logps/generated": -441.98297119140625, |
|
"logps/real": -156.94149780273438, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.906806230545044, |
|
"rewards/margins": 3.7491652965545654, |
|
"rewards/real": -0.15764120221138, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.279317697228145e-07, |
|
"logits/generated": -0.7582974433898926, |
|
"logits/real": -0.9137656092643738, |
|
"logps/generated": -457.762451171875, |
|
"logps/real": -158.60690307617188, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.984735012054443, |
|
"rewards/margins": 4.785731315612793, |
|
"rewards/real": -0.19900405406951904, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3859275053304903e-07, |
|
"logits/generated": -0.7004902958869934, |
|
"logits/real": -0.8594983220100403, |
|
"logps/generated": -452.61187744140625, |
|
"logps/real": -145.72874450683594, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.79620361328125, |
|
"rewards/margins": 5.550940036773682, |
|
"rewards/real": -0.24526312947273254, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4925373134328355e-07, |
|
"logits/generated": -0.7274501919746399, |
|
"logits/real": -0.9405637979507446, |
|
"logps/generated": -462.9717712402344, |
|
"logps/real": -147.3827667236328, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.3193254470825195, |
|
"rewards/margins": 6.078797340393066, |
|
"rewards/real": -0.2405281811952591, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5991471215351813e-07, |
|
"logits/generated": -0.7242128849029541, |
|
"logits/real": -0.9140293002128601, |
|
"logps/generated": -474.15216064453125, |
|
"logps/real": -157.3037567138672, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.953827857971191, |
|
"rewards/margins": 6.571684837341309, |
|
"rewards/real": -0.38214248418807983, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7057569296375266e-07, |
|
"logits/generated": -0.7147258520126343, |
|
"logits/real": -0.8815923929214478, |
|
"logps/generated": -457.26519775390625, |
|
"logps/real": -142.1988067626953, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.94360876083374, |
|
"rewards/margins": 6.643794059753418, |
|
"rewards/real": -0.29981470108032227, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8123667377398718e-07, |
|
"logits/generated": -0.7313283085823059, |
|
"logits/real": -0.8958312273025513, |
|
"logps/generated": -466.23126220703125, |
|
"logps/real": -156.481201171875, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.367417812347412, |
|
"rewards/margins": 7.1352057456970215, |
|
"rewards/real": -0.23221150040626526, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9189765458422174e-07, |
|
"logits/generated": -0.7426391839981079, |
|
"logits/real": -0.8894122838973999, |
|
"logps/generated": -474.2499084472656, |
|
"logps/real": -148.93968200683594, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.244542121887207, |
|
"rewards/margins": 6.9434709548950195, |
|
"rewards/real": -0.30106985569000244, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.025586353944563e-07, |
|
"logits/generated": -0.7155178785324097, |
|
"logits/real": -0.8063043355941772, |
|
"logps/generated": -488.8700256347656, |
|
"logps/real": -178.33253479003906, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.651026248931885, |
|
"rewards/margins": 7.362033843994141, |
|
"rewards/real": -0.28899192810058594, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1321961620469082e-07, |
|
"logits/generated": -0.7116974592208862, |
|
"logits/real": -0.8972026705741882, |
|
"logps/generated": -475.73089599609375, |
|
"logps/real": -143.96710205078125, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.454703330993652, |
|
"rewards/margins": 8.173591613769531, |
|
"rewards/real": -0.28111228346824646, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2388059701492537e-07, |
|
"logits/generated": -0.6575301885604858, |
|
"logits/real": -0.8355759382247925, |
|
"logps/generated": -517.0919799804688, |
|
"logps/real": -158.15267944335938, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -8.714963912963867, |
|
"rewards/margins": 8.384611129760742, |
|
"rewards/real": -0.3303532600402832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.345415778251599e-07, |
|
"logits/generated": -0.6825748085975647, |
|
"logits/real": -0.9428423643112183, |
|
"logps/generated": -481.72381591796875, |
|
"logps/real": -139.2545166015625, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.92175817489624, |
|
"rewards/margins": 7.7295966148376465, |
|
"rewards/real": -0.19216081500053406, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.452025586353944e-07, |
|
"logits/generated": -0.7176781892776489, |
|
"logits/real": -0.8804994821548462, |
|
"logps/generated": -505.34423828125, |
|
"logps/real": -167.8238983154297, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.55711555480957, |
|
"rewards/margins": 9.215084075927734, |
|
"rewards/real": -0.34203046560287476, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.55863539445629e-07, |
|
"logits/generated": -0.7456918358802795, |
|
"logits/real": -0.9094620943069458, |
|
"logps/generated": -521.493408203125, |
|
"logps/real": -168.6183319091797, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.619193077087402, |
|
"rewards/margins": 9.36630916595459, |
|
"rewards/real": -0.2528838515281677, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.665245202558635e-07, |
|
"logits/generated": -0.7071075439453125, |
|
"logits/real": -0.9580531120300293, |
|
"logps/generated": -492.2247009277344, |
|
"logps/real": -122.14371490478516, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.554067611694336, |
|
"rewards/margins": 9.357695579528809, |
|
"rewards/real": -0.19637097418308258, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7718550106609805e-07, |
|
"logits/generated": -0.6865926384925842, |
|
"logits/real": -0.9246328473091125, |
|
"logps/generated": -499.01458740234375, |
|
"logps/real": -112.85029602050781, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.010147094726562, |
|
"rewards/margins": 8.914213180541992, |
|
"rewards/real": -0.0959334522485733, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.878464818763326e-07, |
|
"logits/generated": -0.7495471239089966, |
|
"logits/real": -0.9227398037910461, |
|
"logps/generated": -492.08404541015625, |
|
"logps/real": -152.34420776367188, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.229729652404785, |
|
"rewards/margins": 9.07396125793457, |
|
"rewards/real": -0.15576975047588348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.985074626865671e-07, |
|
"logits/generated": -0.7224324941635132, |
|
"logits/real": -0.9130092859268188, |
|
"logps/generated": -490.20587158203125, |
|
"logps/real": -147.02711486816406, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.046536445617676, |
|
"rewards/margins": 8.867794036865234, |
|
"rewards/real": -0.17874157428741455, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0916844349680174e-07, |
|
"logits/generated": -0.6551756262779236, |
|
"logits/real": -0.856887698173523, |
|
"logps/generated": -494.3564453125, |
|
"logps/real": -127.67320251464844, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.807455062866211, |
|
"rewards/margins": 9.784029960632324, |
|
"rewards/real": -0.02342619001865387, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1982942430703626e-07, |
|
"logits/generated": -0.7393316030502319, |
|
"logits/real": -0.8498824238777161, |
|
"logps/generated": -507.4498596191406, |
|
"logps/real": -155.37051391601562, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.729459762573242, |
|
"rewards/margins": 10.669529914855957, |
|
"rewards/real": -0.05992986634373665, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.304904051172708e-07, |
|
"logits/generated": -0.6794149875640869, |
|
"logits/real": -0.8570221066474915, |
|
"logps/generated": -516.3793334960938, |
|
"logps/real": -147.1600799560547, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.062250137329102, |
|
"rewards/margins": 10.987607955932617, |
|
"rewards/real": -0.07464051991701126, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.411513859275053e-07, |
|
"logits/generated": -0.7035683393478394, |
|
"logits/real": -0.8863167762756348, |
|
"logps/generated": -511.3915100097656, |
|
"logps/real": -155.48513793945312, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.503643989562988, |
|
"rewards/margins": 10.356426239013672, |
|
"rewards/real": -0.1472179889678955, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5181236673773984e-07, |
|
"logits/generated": -0.6560925245285034, |
|
"logits/real": -0.8542153239250183, |
|
"logps/generated": -549.3557739257812, |
|
"logps/real": -134.815185546875, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.663198471069336, |
|
"rewards/margins": 13.555437088012695, |
|
"rewards/real": -0.10776337236166, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6247334754797437e-07, |
|
"logits/generated": -0.6811632513999939, |
|
"logits/real": -0.9368340373039246, |
|
"logps/generated": -537.3572387695312, |
|
"logps/real": -134.58053588867188, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.095390319824219, |
|
"rewards/margins": 12.888254165649414, |
|
"rewards/real": -0.20713606476783752, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.7313432835820895e-07, |
|
"logits/generated": -0.7272433042526245, |
|
"logits/real": -0.9455550909042358, |
|
"logps/generated": -553.9634399414062, |
|
"logps/real": -131.4666748046875, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.890289306640625, |
|
"rewards/margins": 13.677050590515137, |
|
"rewards/real": -0.21323621273040771, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8379530916844347e-07, |
|
"logits/generated": -0.7156924605369568, |
|
"logits/real": -0.8684523701667786, |
|
"logps/generated": -536.7255859375, |
|
"logps/real": -173.733154296875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.108613967895508, |
|
"rewards/margins": 12.777644157409668, |
|
"rewards/real": -0.33096957206726074, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9445628997867805e-07, |
|
"logits/generated": -0.6920727491378784, |
|
"logits/real": -0.851031482219696, |
|
"logps/generated": -580.4476318359375, |
|
"logps/real": -153.07241821289062, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.45750617980957, |
|
"rewards/margins": 14.292282104492188, |
|
"rewards/real": -0.16522422432899475, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.051172707889126e-07, |
|
"logits/generated": -0.7242365479469299, |
|
"logits/real": -0.9248291254043579, |
|
"logps/generated": -566.2769775390625, |
|
"logps/real": -136.8095245361328, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.296048164367676, |
|
"rewards/margins": 14.1116361618042, |
|
"rewards/real": -0.18441154062747955, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.157782515991471e-07, |
|
"logits/generated": -0.7522596120834351, |
|
"logits/real": -0.9050644636154175, |
|
"logps/generated": -568.7861328125, |
|
"logps/real": -143.46664428710938, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.536224365234375, |
|
"rewards/margins": 16.37632942199707, |
|
"rewards/real": -0.15989510715007782, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2643923240938163e-07, |
|
"logits/generated": -0.7549287676811218, |
|
"logits/real": -0.944291889667511, |
|
"logps/generated": -558.4595947265625, |
|
"logps/real": -138.44850158691406, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.336772918701172, |
|
"rewards/margins": 16.19559669494629, |
|
"rewards/real": -0.14117594063282013, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.371002132196162e-07, |
|
"logits/generated": -0.7194818258285522, |
|
"logits/real": -0.9151653051376343, |
|
"logps/generated": -603.3551025390625, |
|
"logps/real": -128.14102172851562, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.891334533691406, |
|
"rewards/margins": 17.594852447509766, |
|
"rewards/real": -0.2964830994606018, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4776119402985074e-07, |
|
"logits/generated": -0.7237090468406677, |
|
"logits/real": -0.876343846321106, |
|
"logps/generated": -554.85302734375, |
|
"logps/real": -144.67776489257812, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.776510238647461, |
|
"rewards/margins": 15.578184127807617, |
|
"rewards/real": -0.19832463562488556, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5842217484008526e-07, |
|
"logits/generated": -0.6880273818969727, |
|
"logits/real": -0.8874770402908325, |
|
"logps/generated": -574.5953369140625, |
|
"logps/real": -144.68075561523438, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.195022583007812, |
|
"rewards/margins": 16.028963088989258, |
|
"rewards/real": -0.16606178879737854, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.690831556503198e-07, |
|
"logits/generated": -0.6841549873352051, |
|
"logits/real": -0.8916375041007996, |
|
"logps/generated": -601.9527587890625, |
|
"logps/real": -154.18507385253906, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.76523208618164, |
|
"rewards/margins": 17.5986270904541, |
|
"rewards/real": -0.16660475730895996, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.797441364605543e-07, |
|
"logits/generated": -0.6988117694854736, |
|
"logits/real": -0.8081305623054504, |
|
"logps/generated": -610.0335693359375, |
|
"logps/real": -151.32000732421875, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.002681732177734, |
|
"rewards/margins": 18.782581329345703, |
|
"rewards/real": -0.22010159492492676, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.904051172707888e-07, |
|
"logits/generated": -0.696639895439148, |
|
"logits/real": -0.9278604388237, |
|
"logps/generated": -585.9586791992188, |
|
"logps/real": -161.8017120361328, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.64035415649414, |
|
"rewards/margins": 17.357894897460938, |
|
"rewards/real": -0.282459557056427, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.998815165876776e-07, |
|
"logits/generated": -0.6872554421424866, |
|
"logits/real": -0.9127834439277649, |
|
"logps/generated": -568.6585693359375, |
|
"logps/real": -129.33038330078125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.379060745239258, |
|
"rewards/margins": 17.275171279907227, |
|
"rewards/real": -0.10388918966054916, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.98696682464455e-07, |
|
"logits/generated": -0.699679970741272, |
|
"logits/real": -0.8975842595100403, |
|
"logps/generated": -584.9615478515625, |
|
"logps/real": -152.40818786621094, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.11277961730957, |
|
"rewards/margins": 17.817615509033203, |
|
"rewards/real": -0.2951619029045105, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.975118483412322e-07, |
|
"logits/generated": -0.7286016941070557, |
|
"logits/real": -0.8225492238998413, |
|
"logps/generated": -618.4642333984375, |
|
"logps/real": -168.58460998535156, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.85270118713379, |
|
"rewards/margins": 19.50424575805664, |
|
"rewards/real": -0.34845709800720215, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963270142180094e-07, |
|
"logits/generated": -0.7258303761482239, |
|
"logits/real": -0.9152861833572388, |
|
"logps/generated": -578.2322387695312, |
|
"logps/real": -137.53619384765625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.372339248657227, |
|
"rewards/margins": 19.179141998291016, |
|
"rewards/real": -0.19319558143615723, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.951421800947867e-07, |
|
"logits/generated": -0.7013474702835083, |
|
"logits/real": -0.8657256960868835, |
|
"logps/generated": -624.0083618164062, |
|
"logps/real": -149.85691833496094, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.289051055908203, |
|
"rewards/margins": 21.97234344482422, |
|
"rewards/real": -0.3167068660259247, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.93957345971564e-07, |
|
"logits/generated": -0.7635418772697449, |
|
"logits/real": -0.868754506111145, |
|
"logps/generated": -628.0731201171875, |
|
"logps/real": -171.21641540527344, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.14756965637207, |
|
"rewards/margins": 21.83392333984375, |
|
"rewards/real": -0.31364530324935913, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.927725118483413e-07, |
|
"logits/generated": -0.7472074031829834, |
|
"logits/real": -0.9306868314743042, |
|
"logps/generated": -664.8667602539062, |
|
"logps/real": -160.69815063476562, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.677719116210938, |
|
"rewards/margins": 25.49850082397461, |
|
"rewards/real": -0.17921803891658783, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.915876777251184e-07, |
|
"logits/generated": -0.6238476037979126, |
|
"logits/real": -0.78472900390625, |
|
"logps/generated": -606.1143798828125, |
|
"logps/real": -158.46510314941406, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.29648208618164, |
|
"rewards/margins": 19.908735275268555, |
|
"rewards/real": -0.3877467215061188, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.904028436018957e-07, |
|
"logits/generated": -0.6451541185379028, |
|
"logits/real": -0.8735024333000183, |
|
"logps/generated": -645.0818481445312, |
|
"logps/real": -131.34632873535156, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.540191650390625, |
|
"rewards/margins": 22.381816864013672, |
|
"rewards/real": -0.15837618708610535, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.892180094786729e-07, |
|
"logits/generated": -0.6812300682067871, |
|
"logits/real": -0.8363407850265503, |
|
"logps/generated": -600.5889282226562, |
|
"logps/real": -161.74234008789062, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.389225006103516, |
|
"rewards/margins": 21.255290985107422, |
|
"rewards/real": -0.133933424949646, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.880331753554502e-07, |
|
"logits/generated": -0.6616766452789307, |
|
"logits/real": -0.8058059811592102, |
|
"logps/generated": -581.693359375, |
|
"logps/real": -169.41537475585938, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.568174362182617, |
|
"rewards/margins": 19.344226837158203, |
|
"rewards/real": -0.22394871711730957, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868483412322275e-07, |
|
"logits/generated": -0.6738962531089783, |
|
"logits/real": -0.8422471880912781, |
|
"logps/generated": -610.7338256835938, |
|
"logps/real": -153.44923400878906, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.05523109436035, |
|
"rewards/margins": 19.948062896728516, |
|
"rewards/real": -0.1071687787771225, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.856635071090047e-07, |
|
"logits/generated": -0.653414785861969, |
|
"logits/real": -0.9212865829467773, |
|
"logps/generated": -636.19677734375, |
|
"logps/real": -136.12069702148438, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.247209548950195, |
|
"rewards/margins": 23.25400161743164, |
|
"rewards/real": 0.006791981868445873, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84478672985782e-07, |
|
"logits/generated": -0.652206301689148, |
|
"logits/real": -0.8476254343986511, |
|
"logps/generated": -619.7491455078125, |
|
"logps/real": -142.06788635253906, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.76266860961914, |
|
"rewards/margins": 21.641902923583984, |
|
"rewards/real": -0.12076608836650848, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.832938388625591e-07, |
|
"logits/generated": -0.6429646015167236, |
|
"logits/real": -0.8978961706161499, |
|
"logps/generated": -618.6793823242188, |
|
"logps/real": -125.8365707397461, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.18993377685547, |
|
"rewards/margins": 22.008413314819336, |
|
"rewards/real": -0.18152059614658356, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.821090047393365e-07, |
|
"logits/generated": -0.6130845546722412, |
|
"logits/real": -0.8363273739814758, |
|
"logps/generated": -597.8096923828125, |
|
"logps/real": -160.01922607421875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.191875457763672, |
|
"rewards/margins": 19.777849197387695, |
|
"rewards/real": -0.4140281081199646, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.809241706161137e-07, |
|
"logits/generated": -0.618815541267395, |
|
"logits/real": -0.7665129899978638, |
|
"logps/generated": -627.1131591796875, |
|
"logps/real": -141.24853515625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.848682403564453, |
|
"rewards/margins": 21.604549407958984, |
|
"rewards/real": -0.24413225054740906, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.79739336492891e-07, |
|
"logits/generated": -0.6469287872314453, |
|
"logits/real": -0.7974787354469299, |
|
"logps/generated": -646.8034057617188, |
|
"logps/real": -140.12033081054688, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.727245330810547, |
|
"rewards/margins": 24.623071670532227, |
|
"rewards/real": -0.10417119413614273, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.785545023696682e-07, |
|
"logits/generated": -0.6693117618560791, |
|
"logits/real": -0.8058202862739563, |
|
"logps/generated": -636.232421875, |
|
"logps/real": -162.97914123535156, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.655866622924805, |
|
"rewards/margins": 24.45601463317871, |
|
"rewards/real": -0.19985152781009674, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.773696682464455e-07, |
|
"logits/generated": -0.6164982914924622, |
|
"logits/real": -0.7986790537834167, |
|
"logps/generated": -607.1170654296875, |
|
"logps/real": -139.31671142578125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.806074142456055, |
|
"rewards/margins": 20.69213104248047, |
|
"rewards/real": -0.11394244432449341, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7618483412322273e-07, |
|
"logits/generated": -0.6816304922103882, |
|
"logits/real": -0.7648627161979675, |
|
"logps/generated": -647.4364013671875, |
|
"logps/real": -150.38284301757812, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.9957275390625, |
|
"rewards/margins": 25.87929344177246, |
|
"rewards/real": -0.11643538624048233, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"logits/generated": -0.6420483589172363, |
|
"logits/real": -0.8686118125915527, |
|
"logps/generated": -686.556640625, |
|
"logps/real": -170.68507385253906, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.79473876953125, |
|
"rewards/margins": 26.400888442993164, |
|
"rewards/real": -0.3938508927822113, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.738151658767772e-07, |
|
"logits/generated": -0.6229578852653503, |
|
"logits/real": -0.7552638649940491, |
|
"logps/generated": -617.7360229492188, |
|
"logps/real": -133.21524047851562, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.75196075439453, |
|
"rewards/margins": 23.369274139404297, |
|
"rewards/real": -0.38268691301345825, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.726303317535545e-07, |
|
"logits/generated": -0.6239826679229736, |
|
"logits/real": -0.8113874197006226, |
|
"logps/generated": -617.291748046875, |
|
"logps/real": -171.48641967773438, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.23967742919922, |
|
"rewards/margins": 20.85289764404297, |
|
"rewards/real": -0.3867819309234619, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7144549763033177e-07, |
|
"logits/generated": -0.5856727361679077, |
|
"logits/real": -0.7748730182647705, |
|
"logps/generated": -646.7052612304688, |
|
"logps/real": -193.67135620117188, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.783584594726562, |
|
"rewards/margins": 23.435470581054688, |
|
"rewards/real": -0.34811311960220337, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.70260663507109e-07, |
|
"logits/generated": -0.6293947100639343, |
|
"logits/real": -0.8080043792724609, |
|
"logps/generated": -639.7860107421875, |
|
"logps/real": -146.7048797607422, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.628271102905273, |
|
"rewards/margins": 23.445405960083008, |
|
"rewards/real": -0.1828646957874298, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.690758293838862e-07, |
|
"logits/generated": -0.615898609161377, |
|
"logits/real": -0.7723320722579956, |
|
"logps/generated": -677.4393310546875, |
|
"logps/real": -144.49502563476562, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.529178619384766, |
|
"rewards/margins": 26.3712158203125, |
|
"rewards/real": -0.1579606533050537, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.678909952606635e-07, |
|
"logits/generated": -0.5974934697151184, |
|
"logits/real": -0.7126566767692566, |
|
"logps/generated": -625.4193115234375, |
|
"logps/real": -168.69549560546875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.015254974365234, |
|
"rewards/margins": 20.59768295288086, |
|
"rewards/real": -0.4175707697868347, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.667061611374407e-07, |
|
"logits/generated": -0.5658475756645203, |
|
"logits/real": -0.7219498157501221, |
|
"logps/generated": -662.0862426757812, |
|
"logps/real": -161.44467163085938, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.643749237060547, |
|
"rewards/margins": 24.445858001708984, |
|
"rewards/real": -0.1978892832994461, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.65521327014218e-07, |
|
"logits/generated": -0.5992667078971863, |
|
"logits/real": -0.834603488445282, |
|
"logps/generated": -645.3001708984375, |
|
"logps/real": -129.46719360351562, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.823612213134766, |
|
"rewards/margins": 22.714466094970703, |
|
"rewards/real": -0.10914424806833267, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6433649289099525e-07, |
|
"logits/generated": -0.6031894087791443, |
|
"logits/real": -0.8013744354248047, |
|
"logps/generated": -621.9285278320312, |
|
"logps/real": -142.18630981445312, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.36435317993164, |
|
"rewards/margins": 23.20974349975586, |
|
"rewards/real": -0.15460748970508575, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.631516587677725e-07, |
|
"logits/generated": -0.6474970579147339, |
|
"logits/real": -0.7969701290130615, |
|
"logps/generated": -695.6294555664062, |
|
"logps/real": -168.54324340820312, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.67018699645996, |
|
"rewards/margins": 27.486658096313477, |
|
"rewards/real": -0.1835293024778366, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196682464454974e-07, |
|
"logits/generated": -0.5945593118667603, |
|
"logits/real": -0.8760132789611816, |
|
"logps/generated": -632.8075561523438, |
|
"logps/real": -137.9114532470703, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.00465965270996, |
|
"rewards/margins": 23.872516632080078, |
|
"rewards/real": -0.13214412331581116, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.60781990521327e-07, |
|
"logits/generated": -0.6659427285194397, |
|
"logits/real": -0.7805012464523315, |
|
"logps/generated": -676.3375244140625, |
|
"logps/real": -144.7049102783203, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.78857421875, |
|
"rewards/margins": 25.562801361083984, |
|
"rewards/real": -0.22577252984046936, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5959715639810423e-07, |
|
"logits/generated": -0.5976084470748901, |
|
"logits/real": -0.7444257140159607, |
|
"logps/generated": -604.505615234375, |
|
"logps/real": -165.31021118164062, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.380346298217773, |
|
"rewards/margins": 21.084182739257812, |
|
"rewards/real": -0.2961658239364624, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5841232227488145e-07, |
|
"logits/generated": -0.6201892495155334, |
|
"logits/real": -0.7714813351631165, |
|
"logps/generated": -661.017822265625, |
|
"logps/real": -157.14991760253906, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.09454345703125, |
|
"rewards/margins": 24.870752334594727, |
|
"rewards/real": -0.22379302978515625, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5722748815165873e-07, |
|
"logits/generated": -0.6445611715316772, |
|
"logits/real": -0.8067296147346497, |
|
"logps/generated": -723.9563598632812, |
|
"logps/real": -160.5393829345703, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.98370361328125, |
|
"rewards/margins": 30.44954490661621, |
|
"rewards/real": -0.5341606736183167, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.56042654028436e-07, |
|
"logits/generated": -0.6212409734725952, |
|
"logits/real": -0.8002877235412598, |
|
"logps/generated": -664.5400390625, |
|
"logps/real": -154.18406677246094, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.307668685913086, |
|
"rewards/margins": 25.93606185913086, |
|
"rewards/real": -0.37160566449165344, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5485781990521327e-07, |
|
"logits/generated": -0.6696589589118958, |
|
"logits/real": -0.872015655040741, |
|
"logps/generated": -713.975830078125, |
|
"logps/real": -128.1663055419922, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.322790145874023, |
|
"rewards/margins": 28.88034439086914, |
|
"rewards/real": -0.44244661927223206, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.536729857819905e-07, |
|
"logits/generated": -0.6352511644363403, |
|
"logits/real": -0.8168119192123413, |
|
"logps/generated": -690.1238403320312, |
|
"logps/real": -147.59390258789062, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.160791397094727, |
|
"rewards/margins": 28.624774932861328, |
|
"rewards/real": -0.5360159873962402, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5248815165876776e-07, |
|
"logits/generated": -0.6255658268928528, |
|
"logits/real": -0.7953276634216309, |
|
"logps/generated": -707.5949096679688, |
|
"logps/real": -164.2091827392578, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.47397232055664, |
|
"rewards/margins": 29.822368621826172, |
|
"rewards/real": -0.6516034007072449, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5130331753554504e-07, |
|
"logits/generated": -0.6212276816368103, |
|
"logits/real": -0.7597033381462097, |
|
"logps/generated": -692.6005859375, |
|
"logps/real": -188.38082885742188, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.789087295532227, |
|
"rewards/margins": 30.043895721435547, |
|
"rewards/real": -0.7451905608177185, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5011848341232226e-07, |
|
"logits/generated": -0.6456987261772156, |
|
"logits/real": -0.8051185607910156, |
|
"logps/generated": -692.8140258789062, |
|
"logps/real": -150.5452880859375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.668848037719727, |
|
"rewards/margins": 30.199214935302734, |
|
"rewards/real": -0.46962958574295044, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.489336492890995e-07, |
|
"logits/generated": -0.6712831258773804, |
|
"logits/real": -0.826252281665802, |
|
"logps/generated": -746.2249145507812, |
|
"logps/real": -135.2972412109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.030811309814453, |
|
"rewards/margins": 30.67293930053711, |
|
"rewards/real": -0.3578687012195587, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4774881516587675e-07, |
|
"logits/generated": -0.6518301367759705, |
|
"logits/real": -0.8644415736198425, |
|
"logps/generated": -690.9013671875, |
|
"logps/real": -161.36314392089844, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.964740753173828, |
|
"rewards/margins": 28.42722511291504, |
|
"rewards/real": -0.5375159978866577, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.46563981042654e-07, |
|
"logits/generated": -0.6160604953765869, |
|
"logits/real": -0.8334490060806274, |
|
"logps/generated": -717.3743286132812, |
|
"logps/real": -132.34591674804688, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.668527603149414, |
|
"rewards/margins": 28.09389877319336, |
|
"rewards/real": -0.5746307969093323, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4537914691943124e-07, |
|
"logits/generated": -0.6645776033401489, |
|
"logits/real": -0.749662458896637, |
|
"logps/generated": -705.51708984375, |
|
"logps/real": -170.75979614257812, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.325271606445312, |
|
"rewards/margins": 29.850238800048828, |
|
"rewards/real": -0.4750315248966217, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.441943127962085e-07, |
|
"logits/generated": -0.5848880410194397, |
|
"logits/real": -0.7599430084228516, |
|
"logps/generated": -679.7612915039062, |
|
"logps/real": -162.4516143798828, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.77053451538086, |
|
"rewards/margins": 28.397497177124023, |
|
"rewards/real": -0.37303638458251953, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.430094786729858e-07, |
|
"logits/generated": -0.6037660837173462, |
|
"logits/real": -0.7843543887138367, |
|
"logps/generated": -699.0863037109375, |
|
"logps/real": -141.5878143310547, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.310083389282227, |
|
"rewards/margins": 29.800827026367188, |
|
"rewards/real": -0.5092543363571167, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4182464454976306e-07, |
|
"logits/generated": -0.6485855579376221, |
|
"logits/real": -0.7831935882568359, |
|
"logps/generated": -758.2669677734375, |
|
"logps/real": -161.2501220703125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.414588928222656, |
|
"rewards/margins": 34.92060852050781, |
|
"rewards/real": -0.49398383498191833, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4063981042654023e-07, |
|
"logits/generated": -0.6142803430557251, |
|
"logits/real": -0.8085862398147583, |
|
"logps/generated": -758.5242919921875, |
|
"logps/real": -143.28065490722656, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.11386489868164, |
|
"rewards/margins": 34.51411056518555, |
|
"rewards/real": -0.5997532606124878, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.394549763033175e-07, |
|
"logits/generated": -0.6144439578056335, |
|
"logits/real": -0.7795756459236145, |
|
"logps/generated": -712.9147338867188, |
|
"logps/real": -147.69723510742188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.72540283203125, |
|
"rewards/margins": 32.39826202392578, |
|
"rewards/real": -0.3271421492099762, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.382701421800948e-07, |
|
"logits/generated": -0.6217916011810303, |
|
"logits/real": -0.7831851840019226, |
|
"logps/generated": -756.0094604492188, |
|
"logps/real": -150.90347290039062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.669349670410156, |
|
"rewards/margins": 36.12870788574219, |
|
"rewards/real": -0.5406419634819031, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.37085308056872e-07, |
|
"logits/generated": -0.5746406316757202, |
|
"logits/real": -0.7443927526473999, |
|
"logps/generated": -721.3018798828125, |
|
"logps/real": -167.23497009277344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.08510971069336, |
|
"rewards/margins": 32.51539993286133, |
|
"rewards/real": -0.5697122812271118, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3590047393364927e-07, |
|
"logits/generated": -0.6204794645309448, |
|
"logits/real": -0.8241082429885864, |
|
"logps/generated": -771.1573486328125, |
|
"logps/real": -142.06658935546875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.35809326171875, |
|
"rewards/margins": 35.951690673828125, |
|
"rewards/real": -0.40640267729759216, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3471563981042654e-07, |
|
"logits/generated": -0.6459885239601135, |
|
"logits/real": -0.7648425698280334, |
|
"logps/generated": -728.7994384765625, |
|
"logps/real": -142.18283081054688, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.09253692626953, |
|
"rewards/margins": 34.53176498413086, |
|
"rewards/real": -0.5607694387435913, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.335308056872038e-07, |
|
"logits/generated": -0.6198188066482544, |
|
"logits/real": -0.8037668466567993, |
|
"logps/generated": -784.0115966796875, |
|
"logps/real": -148.4331817626953, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.82765579223633, |
|
"rewards/margins": 36.30569839477539, |
|
"rewards/real": -0.5219635367393494, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.32345971563981e-07, |
|
"logits/generated": -0.5650381445884705, |
|
"logits/real": -0.7140682339668274, |
|
"logps/generated": -789.3671875, |
|
"logps/real": -162.1250762939453, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.974632263183594, |
|
"rewards/margins": 37.51204299926758, |
|
"rewards/real": -0.46258825063705444, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3116113744075825e-07, |
|
"logits/generated": -0.5569009780883789, |
|
"logits/real": -0.6691209077835083, |
|
"logps/generated": -727.1697387695312, |
|
"logps/real": -149.501953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.12350082397461, |
|
"rewards/margins": 31.786418914794922, |
|
"rewards/real": -0.3370811939239502, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.299763033175355e-07, |
|
"logits/generated": -0.6008241772651672, |
|
"logits/real": -0.7835357785224915, |
|
"logps/generated": -766.7589111328125, |
|
"logps/real": -131.9623260498047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.20771026611328, |
|
"rewards/margins": 36.767494201660156, |
|
"rewards/real": -0.4402230381965637, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2879146919431274e-07, |
|
"logits/generated": -0.5574159622192383, |
|
"logits/real": -0.7532224059104919, |
|
"logps/generated": -766.7594604492188, |
|
"logps/real": -175.73806762695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.75395965576172, |
|
"rewards/margins": 34.18846130371094, |
|
"rewards/real": -0.565497875213623, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2760663507109e-07, |
|
"logits/generated": -0.6016499400138855, |
|
"logits/real": -0.685789942741394, |
|
"logps/generated": -747.66748046875, |
|
"logps/real": -175.3254852294922, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.80033874511719, |
|
"rewards/margins": 33.334190368652344, |
|
"rewards/real": -0.46614784002304077, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.264218009478673e-07, |
|
"logits/generated": -0.514714777469635, |
|
"logits/real": -0.729649543762207, |
|
"logps/generated": -599.8255615234375, |
|
"logps/real": -137.29776000976562, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.55307960510254, |
|
"rewards/margins": 21.553081512451172, |
|
"rewards/real": 2.1871178432775196e-06, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2523696682464456e-07, |
|
"logits/generated": -0.44834762811660767, |
|
"logits/real": -0.662898600101471, |
|
"logps/generated": -626.8858642578125, |
|
"logps/real": -151.83450317382812, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.125072479248047, |
|
"rewards/margins": 20.952497482299805, |
|
"rewards/real": -0.17257389426231384, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.240521327014218e-07, |
|
"logits/generated": -0.5126262307167053, |
|
"logits/real": -0.6713369488716125, |
|
"logps/generated": -653.6041259765625, |
|
"logps/real": -159.56890869140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.021862030029297, |
|
"rewards/margins": 22.88108253479004, |
|
"rewards/real": -0.14077897369861603, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.22867298578199e-07, |
|
"logits/generated": -0.4650425910949707, |
|
"logits/real": -0.763433575630188, |
|
"logps/generated": -658.9270629882812, |
|
"logps/real": -135.75672912597656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.407550811767578, |
|
"rewards/margins": 24.228670120239258, |
|
"rewards/real": -0.1788794994354248, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.216824644549763e-07, |
|
"logits/generated": -0.4908994138240814, |
|
"logits/real": -0.6465893983840942, |
|
"logps/generated": -632.4884033203125, |
|
"logps/real": -145.31004333496094, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.62966537475586, |
|
"rewards/margins": 21.41011619567871, |
|
"rewards/real": -0.2195475846529007, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2049763033175355e-07, |
|
"logits/generated": -0.4437866806983948, |
|
"logits/real": -0.6466466188430786, |
|
"logps/generated": -626.27294921875, |
|
"logps/real": -149.35035705566406, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.251583099365234, |
|
"rewards/margins": 22.10491371154785, |
|
"rewards/real": -0.1466691941022873, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1931279620853077e-07, |
|
"logits/generated": -0.47175711393356323, |
|
"logits/real": -0.6160026788711548, |
|
"logps/generated": -677.9451904296875, |
|
"logps/real": -135.2311553955078, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.66263771057129, |
|
"rewards/margins": 24.36861801147461, |
|
"rewards/real": -0.2940204441547394, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1812796208530804e-07, |
|
"logits/generated": -0.5612315535545349, |
|
"logits/real": -0.6705020666122437, |
|
"logps/generated": -676.8372802734375, |
|
"logps/real": -168.85000610351562, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.101232528686523, |
|
"rewards/margins": 26.762081146240234, |
|
"rewards/real": -0.3391529619693756, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.169431279620853e-07, |
|
"logits/generated": -0.4795566201210022, |
|
"logits/real": -0.6562764644622803, |
|
"logps/generated": -666.4014282226562, |
|
"logps/real": -148.19837951660156, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.7773494720459, |
|
"rewards/margins": 25.312881469726562, |
|
"rewards/real": -0.464468777179718, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1575829383886253e-07, |
|
"logits/generated": -0.4656401574611664, |
|
"logits/real": -0.6328948736190796, |
|
"logps/generated": -658.664306640625, |
|
"logps/real": -155.73130798339844, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.921737670898438, |
|
"rewards/margins": 26.695215225219727, |
|
"rewards/real": -0.22652335464954376, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.145734597156398e-07, |
|
"logits/generated": -0.4437786936759949, |
|
"logits/real": -0.6895097494125366, |
|
"logps/generated": -638.5018310546875, |
|
"logps/real": -140.78604125976562, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.6170654296875, |
|
"rewards/margins": 25.46429443359375, |
|
"rewards/real": -0.15277239680290222, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1338862559241703e-07, |
|
"logits/generated": -0.4505455493927002, |
|
"logits/real": -0.6155862808227539, |
|
"logps/generated": -652.1417236328125, |
|
"logps/real": -166.3102569580078, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.8651065826416, |
|
"rewards/margins": 25.632049560546875, |
|
"rewards/real": -0.2330542355775833, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.122037914691943e-07, |
|
"logits/generated": -0.5029697418212891, |
|
"logits/real": -0.6487875580787659, |
|
"logps/generated": -634.719482421875, |
|
"logps/real": -166.7958526611328, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.444910049438477, |
|
"rewards/margins": 22.197734832763672, |
|
"rewards/real": -0.24717314541339874, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.110189573459715e-07, |
|
"logits/generated": -0.4739890992641449, |
|
"logits/real": -0.7358786463737488, |
|
"logps/generated": -627.636474609375, |
|
"logps/real": -128.64334106445312, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.257991790771484, |
|
"rewards/margins": 23.142520904541016, |
|
"rewards/real": -0.11547265946865082, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.098341232227488e-07, |
|
"logits/generated": -0.49049538373947144, |
|
"logits/real": -0.7324908971786499, |
|
"logps/generated": -697.52392578125, |
|
"logps/real": -150.76388549804688, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.034936904907227, |
|
"rewards/margins": 26.84500503540039, |
|
"rewards/real": -0.18993662297725677, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0864928909952607e-07, |
|
"logits/generated": -0.5197226405143738, |
|
"logits/real": -0.732746958732605, |
|
"logps/generated": -697.392333984375, |
|
"logps/real": -138.52279663085938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.976947784423828, |
|
"rewards/margins": 27.898412704467773, |
|
"rewards/real": -0.0785362496972084, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.074644549763033e-07, |
|
"logits/generated": -0.5259883403778076, |
|
"logits/real": -0.6905041933059692, |
|
"logps/generated": -631.3746337890625, |
|
"logps/real": -171.46287536621094, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.118450164794922, |
|
"rewards/margins": 23.003276824951172, |
|
"rewards/real": -0.11517591774463654, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0627962085308056e-07, |
|
"logits/generated": -0.4914798140525818, |
|
"logits/real": -0.6694945096969604, |
|
"logps/generated": -665.9325561523438, |
|
"logps/real": -152.16909790039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.65480613708496, |
|
"rewards/margins": 26.53061294555664, |
|
"rewards/real": -0.12419945001602173, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0509478672985783e-07, |
|
"logits/generated": -0.435981810092926, |
|
"logits/real": -0.6437792181968689, |
|
"logps/generated": -639.2318115234375, |
|
"logps/real": -139.5286102294922, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.267459869384766, |
|
"rewards/margins": 23.242870330810547, |
|
"rewards/real": -0.024588558822870255, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0390995260663505e-07, |
|
"logits/generated": -0.46243348717689514, |
|
"logits/real": -0.6304915547370911, |
|
"logps/generated": -671.2921142578125, |
|
"logps/real": -151.30206298828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.752239227294922, |
|
"rewards/margins": 25.52545738220215, |
|
"rewards/real": -0.2267828732728958, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0272511848341227e-07, |
|
"logits/generated": -0.4131905436515808, |
|
"logits/real": -0.6179688572883606, |
|
"logps/generated": -685.4735107421875, |
|
"logps/real": -166.12875366210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.9171085357666, |
|
"rewards/margins": 27.653793334960938, |
|
"rewards/real": -0.2633177638053894, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0154028436018954e-07, |
|
"logits/generated": -0.45457887649536133, |
|
"logits/real": -0.7321020364761353, |
|
"logps/generated": -687.7710571289062, |
|
"logps/real": -118.0494384765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.879419326782227, |
|
"rewards/margins": 27.65741539001465, |
|
"rewards/real": -0.22200465202331543, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.003554502369668e-07, |
|
"logits/generated": -0.4731278419494629, |
|
"logits/real": -0.6649892926216125, |
|
"logps/generated": -702.4985961914062, |
|
"logps/real": -164.13600158691406, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.949352264404297, |
|
"rewards/margins": 29.615692138671875, |
|
"rewards/real": -0.33366328477859497, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.991706161137441e-07, |
|
"logits/generated": -0.5154431462287903, |
|
"logits/real": -0.6099938750267029, |
|
"logps/generated": -733.9407958984375, |
|
"logps/real": -174.83822631835938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.6252384185791, |
|
"rewards/margins": 31.47017478942871, |
|
"rewards/real": -0.15506593883037567, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.979857819905213e-07, |
|
"logits/generated": -0.47972407937049866, |
|
"logits/real": -0.7196077108383179, |
|
"logps/generated": -695.8734741210938, |
|
"logps/real": -144.66249084472656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.702346801757812, |
|
"rewards/margins": 27.56471824645996, |
|
"rewards/real": -0.13762858510017395, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968009478672986e-07, |
|
"logits/generated": -0.406221866607666, |
|
"logits/real": -0.6687533259391785, |
|
"logps/generated": -655.7437744140625, |
|
"logps/real": -135.68966674804688, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.703540802001953, |
|
"rewards/margins": 28.489782333374023, |
|
"rewards/real": -0.21375396847724915, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9561611374407585e-07, |
|
"logits/generated": -0.45106711983680725, |
|
"logits/real": -0.6919107437133789, |
|
"logps/generated": -715.7987060546875, |
|
"logps/real": -138.26852416992188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.545223236083984, |
|
"rewards/margins": 32.39795684814453, |
|
"rewards/real": -0.1472676545381546, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.94431279620853e-07, |
|
"logits/generated": -0.48046213388442993, |
|
"logits/real": -0.5451101064682007, |
|
"logps/generated": -697.7542724609375, |
|
"logps/real": -152.2278594970703, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.348682403564453, |
|
"rewards/margins": 29.207202911376953, |
|
"rewards/real": -0.14147798717021942, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.932464454976303e-07, |
|
"logits/generated": -0.4254804253578186, |
|
"logits/real": -0.6588962078094482, |
|
"logps/generated": -705.3590087890625, |
|
"logps/real": -152.607421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.081212997436523, |
|
"rewards/margins": 30.919677734375, |
|
"rewards/real": -0.16153457760810852, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9206161137440757e-07, |
|
"logits/generated": -0.48913320899009705, |
|
"logits/real": -0.6368371248245239, |
|
"logps/generated": -745.2406005859375, |
|
"logps/real": -166.21762084960938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.84128952026367, |
|
"rewards/margins": 33.531410217285156, |
|
"rewards/real": -0.30988219380378723, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9087677725118484e-07, |
|
"logits/generated": -0.4334734380245209, |
|
"logits/real": -0.5950613021850586, |
|
"logps/generated": -713.3173217773438, |
|
"logps/real": -172.47543334960938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.074203491210938, |
|
"rewards/margins": 30.784961700439453, |
|
"rewards/real": -0.28924185037612915, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8969194312796206e-07, |
|
"logits/generated": -0.49943074584007263, |
|
"logits/real": -0.6621376276016235, |
|
"logps/generated": -750.9681396484375, |
|
"logps/real": -154.4829864501953, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.702816009521484, |
|
"rewards/margins": 32.558631896972656, |
|
"rewards/real": -0.14418402314186096, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8850710900473933e-07, |
|
"logits/generated": -0.4399910867214203, |
|
"logits/real": -0.6627537608146667, |
|
"logps/generated": -712.4768676757812, |
|
"logps/real": -149.22299194335938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.581218719482422, |
|
"rewards/margins": 30.372350692749023, |
|
"rewards/real": -0.2088705599308014, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.873222748815166e-07, |
|
"logits/generated": -0.4090496897697449, |
|
"logits/real": -0.6347898244857788, |
|
"logps/generated": -691.2296752929688, |
|
"logps/real": -159.6715545654297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.685245513916016, |
|
"rewards/margins": 28.553665161132812, |
|
"rewards/real": -0.13157956302165985, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8613744075829377e-07, |
|
"logits/generated": -0.44337087869644165, |
|
"logits/real": -0.7070174813270569, |
|
"logps/generated": -725.4908447265625, |
|
"logps/real": -135.2198486328125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.43515396118164, |
|
"rewards/margins": 31.318073272705078, |
|
"rewards/real": -0.11708203703165054, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8495260663507104e-07, |
|
"logits/generated": -0.45473846793174744, |
|
"logits/real": -0.7236835360527039, |
|
"logps/generated": -727.946533203125, |
|
"logps/real": -142.0230712890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.88336944580078, |
|
"rewards/margins": 33.610408782958984, |
|
"rewards/real": -0.27295243740081787, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.837677725118483e-07, |
|
"logits/generated": -0.5566205978393555, |
|
"logits/real": -0.7040198445320129, |
|
"logps/generated": -782.2391357421875, |
|
"logps/real": -159.59271240234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.38234329223633, |
|
"rewards/margins": 36.12818908691406, |
|
"rewards/real": -0.2541573643684387, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.825829383886256e-07, |
|
"logits/generated": -0.4843382239341736, |
|
"logits/real": -0.7185007929801941, |
|
"logps/generated": -782.1573486328125, |
|
"logps/real": -133.78878784179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.17876434326172, |
|
"rewards/margins": 36.1375846862793, |
|
"rewards/real": -0.04117864370346069, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.813981042654028e-07, |
|
"logits/generated": -0.527305006980896, |
|
"logits/real": -0.750108540058136, |
|
"logps/generated": -724.2720947265625, |
|
"logps/real": -165.68260192871094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.12436294555664, |
|
"rewards/margins": 34.905982971191406, |
|
"rewards/real": -0.21837835013866425, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.802132701421801e-07, |
|
"logits/generated": -0.4532243609428406, |
|
"logits/real": -0.7372425198554993, |
|
"logps/generated": -707.7957763671875, |
|
"logps/real": -111.7388687133789, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.10491943359375, |
|
"rewards/margins": 30.893756866455078, |
|
"rewards/real": -0.21116304397583008, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.7902843601895736e-07, |
|
"logits/generated": -0.5261486768722534, |
|
"logits/real": -0.7524106502532959, |
|
"logps/generated": -750.0657958984375, |
|
"logps/real": -141.4464111328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.03588104248047, |
|
"rewards/margins": 35.844032287597656, |
|
"rewards/real": -0.19184735417366028, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778436018957346e-07, |
|
"logits/generated": -0.49053382873535156, |
|
"logits/real": -0.6678867936134338, |
|
"logps/generated": -723.454833984375, |
|
"logps/real": -158.5665283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.78173828125, |
|
"rewards/margins": 32.50086975097656, |
|
"rewards/real": -0.280868798494339, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.766587677725118e-07, |
|
"logits/generated": -0.5047518014907837, |
|
"logits/real": -0.7079204320907593, |
|
"logps/generated": -738.470947265625, |
|
"logps/real": -146.69248962402344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.998992919921875, |
|
"rewards/margins": 33.81964111328125, |
|
"rewards/real": -0.1793525069952011, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7547393364928907e-07, |
|
"logits/generated": -0.47372421622276306, |
|
"logits/real": -0.7369820475578308, |
|
"logps/generated": -785.177978515625, |
|
"logps/real": -148.13267517089844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.820003509521484, |
|
"rewards/margins": 36.730655670166016, |
|
"rewards/real": -0.08935005962848663, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7428909952606634e-07, |
|
"logits/generated": -0.5118182897567749, |
|
"logits/real": -0.6915109753608704, |
|
"logps/generated": -767.2000732421875, |
|
"logps/real": -178.1002655029297, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.287994384765625, |
|
"rewards/margins": 35.94424057006836, |
|
"rewards/real": -0.3437514007091522, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7310426540284356e-07, |
|
"logits/generated": -0.48033565282821655, |
|
"logits/real": -0.6938971281051636, |
|
"logps/generated": -790.1136474609375, |
|
"logps/real": -139.7992401123047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.80128860473633, |
|
"rewards/margins": 37.25532531738281, |
|
"rewards/real": -0.5459665060043335, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7191943127962083e-07, |
|
"logits/generated": -0.48637381196022034, |
|
"logits/real": -0.6838294863700867, |
|
"logps/generated": -765.7503662109375, |
|
"logps/real": -158.60995483398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.00304412841797, |
|
"rewards/margins": 34.73870086669922, |
|
"rewards/real": -0.26434019207954407, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.707345971563981e-07, |
|
"logits/generated": -0.44851940870285034, |
|
"logits/real": -0.6709171533584595, |
|
"logps/generated": -771.0245971679688, |
|
"logps/real": -168.75152587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.06503677368164, |
|
"rewards/margins": 37.67142105102539, |
|
"rewards/real": -0.3936167359352112, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.695497630331754e-07, |
|
"logits/generated": -0.45538201928138733, |
|
"logits/real": -0.6043254733085632, |
|
"logps/generated": -774.0435791015625, |
|
"logps/real": -145.94451904296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.95838928222656, |
|
"rewards/margins": 36.758235931396484, |
|
"rewards/real": -0.2001533955335617, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.683649289099526e-07, |
|
"logits/generated": -0.4710386395454407, |
|
"logits/real": -0.6742110252380371, |
|
"logps/generated": -798.041259765625, |
|
"logps/real": -149.87484741210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.644805908203125, |
|
"rewards/margins": 38.51628875732422, |
|
"rewards/real": -0.12851884961128235, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.671800947867298e-07, |
|
"logits/generated": -0.4763055741786957, |
|
"logits/real": -0.6764456629753113, |
|
"logps/generated": -791.0558471679688, |
|
"logps/real": -157.50120544433594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.532691955566406, |
|
"rewards/margins": 38.26321029663086, |
|
"rewards/real": -0.26947957277297974, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.659952606635071e-07, |
|
"logits/generated": -0.47692328691482544, |
|
"logits/real": -0.606033980846405, |
|
"logps/generated": -754.9681396484375, |
|
"logps/real": -148.64236450195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.7038459777832, |
|
"rewards/margins": 35.1637077331543, |
|
"rewards/real": -0.5401372313499451, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.648104265402843e-07, |
|
"logits/generated": -0.49335426092147827, |
|
"logits/real": -0.68101966381073, |
|
"logps/generated": -862.1978759765625, |
|
"logps/real": -143.81234741210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.46797180175781, |
|
"rewards/margins": 43.25313186645508, |
|
"rewards/real": -0.21484307944774628, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.636255924170616e-07, |
|
"logits/generated": -0.4993807375431061, |
|
"logits/real": -0.6376734972000122, |
|
"logps/generated": -830.0358276367188, |
|
"logps/real": -173.48251342773438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -42.75886154174805, |
|
"rewards/margins": 42.36219787597656, |
|
"rewards/real": -0.3966585397720337, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6244075829383886e-07, |
|
"logits/generated": -0.4894142746925354, |
|
"logits/real": -0.671286940574646, |
|
"logps/generated": -791.9283447265625, |
|
"logps/real": -157.88449096679688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.980499267578125, |
|
"rewards/margins": 37.63560104370117, |
|
"rewards/real": -0.34489426016807556, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6125592417061613e-07, |
|
"logits/generated": -0.4308968484401703, |
|
"logits/real": -0.6662777066230774, |
|
"logps/generated": -815.94970703125, |
|
"logps/real": -132.9452667236328, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.20854187011719, |
|
"rewards/margins": 40.93267059326172, |
|
"rewards/real": -0.275868684053421, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.6007109004739335e-07, |
|
"logits/generated": -0.44717854261398315, |
|
"logits/real": -0.6490769982337952, |
|
"logps/generated": -752.9622192382812, |
|
"logps/real": -155.48178100585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.214019775390625, |
|
"rewards/margins": 35.96308135986328, |
|
"rewards/real": -0.25093746185302734, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.588862559241706e-07, |
|
"logits/generated": -0.47657886147499084, |
|
"logits/real": -0.6330237984657288, |
|
"logps/generated": -787.4442749023438, |
|
"logps/real": -159.59701538085938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.82368850708008, |
|
"rewards/margins": 36.52547073364258, |
|
"rewards/real": -0.2982181906700134, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5770142180094784e-07, |
|
"logits/generated": -0.4685605466365814, |
|
"logits/real": -0.6297181844711304, |
|
"logps/generated": -827.0250244140625, |
|
"logps/real": -150.63694763183594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.97758483886719, |
|
"rewards/margins": 41.71283721923828, |
|
"rewards/real": -0.26475200057029724, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5651658767772506e-07, |
|
"logits/generated": -0.45342230796813965, |
|
"logits/real": -0.6486082673072815, |
|
"logps/generated": -749.4054565429688, |
|
"logps/real": -126.46002197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.42198944091797, |
|
"rewards/margins": 35.160091400146484, |
|
"rewards/real": -0.26189571619033813, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5533175355450234e-07, |
|
"logits/generated": -0.4470803141593933, |
|
"logits/real": -0.5735016465187073, |
|
"logps/generated": -766.5286865234375, |
|
"logps/real": -174.92886352539062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.0324592590332, |
|
"rewards/margins": 38.663352966308594, |
|
"rewards/real": -0.3691008687019348, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.541469194312796e-07, |
|
"logits/generated": -0.47693657875061035, |
|
"logits/real": -0.7134417295455933, |
|
"logps/generated": -820.4762573242188, |
|
"logps/real": -122.05989837646484, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.139793395996094, |
|
"rewards/margins": 42.87862014770508, |
|
"rewards/real": -0.26117831468582153, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.529620853080569e-07, |
|
"logits/generated": -0.45882320404052734, |
|
"logits/real": -0.664508044719696, |
|
"logps/generated": -831.1781005859375, |
|
"logps/real": -147.54002380371094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.169921875, |
|
"rewards/margins": 40.772850036621094, |
|
"rewards/real": -0.39707642793655396, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.517772511848341e-07, |
|
"logits/generated": -0.4301510453224182, |
|
"logits/real": -0.6589769124984741, |
|
"logps/generated": -819.22119140625, |
|
"logps/real": -183.15982055664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.374855041503906, |
|
"rewards/margins": 41.15515899658203, |
|
"rewards/real": -0.2196962833404541, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.505924170616114e-07, |
|
"logits/generated": -0.4489319920539856, |
|
"logits/real": -0.6075456738471985, |
|
"logps/generated": -850.15673828125, |
|
"logps/real": -150.83221435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.46028137207031, |
|
"rewards/margins": 43.98347473144531, |
|
"rewards/real": -0.47680991888046265, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4940758293838865e-07, |
|
"logits/generated": -0.4293234944343567, |
|
"logits/real": -0.7189976572990417, |
|
"logps/generated": -835.0768432617188, |
|
"logps/real": -131.94656372070312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -42.448909759521484, |
|
"rewards/margins": 42.11590576171875, |
|
"rewards/real": -0.3330024182796478, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.482227488151658e-07, |
|
"logits/generated": -0.41782283782958984, |
|
"logits/real": -0.5898563265800476, |
|
"logps/generated": -835.0525512695312, |
|
"logps/real": -165.47747802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.286441802978516, |
|
"rewards/margins": 42.87544631958008, |
|
"rewards/real": -0.4109969735145569, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.470379146919431e-07, |
|
"logits/generated": -0.4614785313606262, |
|
"logits/real": -0.5493655204772949, |
|
"logps/generated": -817.9602661132812, |
|
"logps/real": -168.73268127441406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -40.68376159667969, |
|
"rewards/margins": 40.22270202636719, |
|
"rewards/real": -0.4610599875450134, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4585308056872036e-07, |
|
"logits/generated": -0.4272429347038269, |
|
"logits/real": -0.5022194981575012, |
|
"logps/generated": -787.2926635742188, |
|
"logps/real": -167.05831909179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.47704315185547, |
|
"rewards/margins": 38.09776306152344, |
|
"rewards/real": -0.37928327918052673, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4466824644549763e-07, |
|
"logits/generated": -0.4905944764614105, |
|
"logits/real": -0.6622756719589233, |
|
"logps/generated": -805.285400390625, |
|
"logps/real": -178.21536254882812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.157222747802734, |
|
"rewards/margins": 40.712562561035156, |
|
"rewards/real": -0.4446594715118408, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4348341232227485e-07, |
|
"logits/generated": -0.4158329367637634, |
|
"logits/real": -0.6186385750770569, |
|
"logps/generated": -749.114501953125, |
|
"logps/real": -156.46290588378906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.60091781616211, |
|
"rewards/margins": 36.17322540283203, |
|
"rewards/real": -0.4276936650276184, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.422985781990521e-07, |
|
"logits/generated": -0.4725651144981384, |
|
"logits/real": -0.652617335319519, |
|
"logps/generated": -847.3025512695312, |
|
"logps/real": -150.90065002441406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.543460845947266, |
|
"rewards/margins": 42.95781326293945, |
|
"rewards/real": -0.5856472253799438, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.411137440758294e-07, |
|
"logits/generated": -0.4408513605594635, |
|
"logits/real": -0.6833234429359436, |
|
"logps/generated": -768.7931518554688, |
|
"logps/real": -134.6144256591797, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.13344955444336, |
|
"rewards/margins": 36.78771209716797, |
|
"rewards/real": -0.3457415997982025, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.3992890995260667e-07, |
|
"logits/generated": -0.4581897258758545, |
|
"logits/real": -0.595461368560791, |
|
"logps/generated": -827.2222900390625, |
|
"logps/real": -174.0118408203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -42.74081039428711, |
|
"rewards/margins": 42.331790924072266, |
|
"rewards/real": -0.40901678800582886, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3874407582938384e-07, |
|
"logits/generated": -0.45837849378585815, |
|
"logits/real": -0.6876403093338013, |
|
"logps/generated": -833.8259887695312, |
|
"logps/real": -141.63064575195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.820953369140625, |
|
"rewards/margins": 41.39267349243164, |
|
"rewards/real": -0.42827802896499634, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375592417061611e-07, |
|
"logits/generated": -0.48998793959617615, |
|
"logits/real": -0.6868919730186462, |
|
"logps/generated": -858.2042846679688, |
|
"logps/real": -145.3782196044922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.131046295166016, |
|
"rewards/margins": 43.59005355834961, |
|
"rewards/real": -0.5409911870956421, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.363744075829384e-07, |
|
"logits/generated": -0.4100268483161926, |
|
"logits/real": -0.694664478302002, |
|
"logps/generated": -809.3135375976562, |
|
"logps/real": -166.88694763183594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.03974151611328, |
|
"rewards/margins": 40.51675033569336, |
|
"rewards/real": -0.5229931473731995, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.351895734597156e-07, |
|
"logits/generated": -0.49493294954299927, |
|
"logits/real": -0.6615623235702515, |
|
"logps/generated": -918.6854248046875, |
|
"logps/real": -144.638671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.01395797729492, |
|
"rewards/margins": 51.5162467956543, |
|
"rewards/real": -0.4977096915245056, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.340047393364929e-07, |
|
"logits/generated": -0.3965403735637665, |
|
"logits/real": -0.6068152189254761, |
|
"logps/generated": -797.3876342773438, |
|
"logps/real": -138.4888458251953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.271629333496094, |
|
"rewards/margins": 38.95609664916992, |
|
"rewards/real": -0.3155314326286316, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3281990521327015e-07, |
|
"logits/generated": -0.43841552734375, |
|
"logits/real": -0.5667222738265991, |
|
"logps/generated": -804.7280883789062, |
|
"logps/real": -173.30206298828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.95454788208008, |
|
"rewards/margins": 39.456111907958984, |
|
"rewards/real": -0.49843597412109375, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.316350710900474e-07, |
|
"logits/generated": -0.38486871123313904, |
|
"logits/real": -0.5955111384391785, |
|
"logps/generated": -779.2044677734375, |
|
"logps/real": -138.0015869140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.943260192871094, |
|
"rewards/margins": 38.430908203125, |
|
"rewards/real": -0.5123514533042908, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.304502369668246e-07, |
|
"logits/generated": -0.4344411790370941, |
|
"logits/real": -0.662503182888031, |
|
"logps/generated": -789.8323974609375, |
|
"logps/real": -138.18624877929688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.464324951171875, |
|
"rewards/margins": 39.04804229736328, |
|
"rewards/real": -0.41627994179725647, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2926540284360186e-07, |
|
"logits/generated": -0.40550222992897034, |
|
"logits/real": -0.6265038251876831, |
|
"logps/generated": -832.6241455078125, |
|
"logps/real": -186.16464233398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -42.117034912109375, |
|
"rewards/margins": 41.634342193603516, |
|
"rewards/real": -0.48269376158714294, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2808056872037913e-07, |
|
"logits/generated": -0.4908333718776703, |
|
"logits/real": -0.6979160904884338, |
|
"logps/generated": -800.664794921875, |
|
"logps/real": -144.0208740234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.349517822265625, |
|
"rewards/margins": 38.85806655883789, |
|
"rewards/real": -0.491449773311615, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2689573459715635e-07, |
|
"logits/generated": -0.4737107753753662, |
|
"logits/real": -0.6124163866043091, |
|
"logps/generated": -789.6759643554688, |
|
"logps/real": -173.1675262451172, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -41.43712615966797, |
|
"rewards/margins": 40.75090789794922, |
|
"rewards/real": -0.6862186193466187, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2571090047393363e-07, |
|
"logits/generated": -0.4662472605705261, |
|
"logits/real": -0.7021108865737915, |
|
"logps/generated": -912.7789916992188, |
|
"logps/real": -144.39654541015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -50.74687576293945, |
|
"rewards/margins": 50.47743606567383, |
|
"rewards/real": -0.26943859457969666, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.245260663507109e-07, |
|
"logits/generated": -0.5851739645004272, |
|
"logits/real": -0.708136260509491, |
|
"logps/generated": -838.8018798828125, |
|
"logps/real": -151.24722290039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.7834358215332, |
|
"rewards/margins": 44.43694305419922, |
|
"rewards/real": -0.3464917838573456, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2334123222748817e-07, |
|
"logits/generated": -0.5089236497879028, |
|
"logits/real": -0.6847448945045471, |
|
"logps/generated": -894.1632690429688, |
|
"logps/real": -151.0913543701172, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.116729736328125, |
|
"rewards/margins": 47.81326675415039, |
|
"rewards/real": -0.3034594655036926, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.221563981042654e-07, |
|
"logits/generated": -0.4521718919277191, |
|
"logits/real": -0.609528660774231, |
|
"logps/generated": -829.3768310546875, |
|
"logps/real": -167.9632568359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.617408752441406, |
|
"rewards/margins": 43.08763885498047, |
|
"rewards/real": -0.5297662019729614, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.209715639810426e-07, |
|
"logits/generated": -0.5104943513870239, |
|
"logits/real": -0.6578128933906555, |
|
"logps/generated": -897.2025146484375, |
|
"logps/real": -146.1173858642578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -50.29200744628906, |
|
"rewards/margins": 49.76136016845703, |
|
"rewards/real": -0.5306479930877686, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.197867298578199e-07, |
|
"logits/generated": -0.453556627035141, |
|
"logits/real": -0.6318106651306152, |
|
"logps/generated": -901.5166015625, |
|
"logps/real": -137.2063751220703, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -49.143409729003906, |
|
"rewards/margins": 48.74793243408203, |
|
"rewards/real": -0.3954845070838928, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186018957345971e-07, |
|
"logits/generated": -0.530498743057251, |
|
"logits/real": -0.6793403029441833, |
|
"logps/generated": -878.6456909179688, |
|
"logps/real": -138.333251953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.53958511352539, |
|
"rewards/margins": 47.25537872314453, |
|
"rewards/real": -0.2842068076133728, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.174170616113744e-07, |
|
"logits/generated": -0.5185251832008362, |
|
"logits/real": -0.6888160109519958, |
|
"logps/generated": -882.78076171875, |
|
"logps/real": -149.84170532226562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -46.70854187011719, |
|
"rewards/margins": 46.299781799316406, |
|
"rewards/real": -0.40876227617263794, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1623222748815165e-07, |
|
"logits/generated": -0.5407160520553589, |
|
"logits/real": -0.6995197534561157, |
|
"logps/generated": -925.1043701171875, |
|
"logps/real": -172.48684692382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.484397888183594, |
|
"rewards/margins": 52.23331832885742, |
|
"rewards/real": -0.2510821521282196, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.150473933649289e-07, |
|
"logits/generated": -0.47896209359169006, |
|
"logits/real": -0.648679792881012, |
|
"logps/generated": -855.0648193359375, |
|
"logps/real": -140.78317260742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.74834442138672, |
|
"rewards/margins": 44.42583084106445, |
|
"rewards/real": -0.3225128650665283, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1386255924170614e-07, |
|
"logits/generated": -0.48660707473754883, |
|
"logits/real": -0.6823971271514893, |
|
"logps/generated": -855.0968627929688, |
|
"logps/real": -149.41519165039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.171939849853516, |
|
"rewards/margins": 44.90800857543945, |
|
"rewards/real": -0.26393207907676697, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.126777251184834e-07, |
|
"logits/generated": -0.4488789141178131, |
|
"logits/real": -0.6946064233779907, |
|
"logps/generated": -834.7445068359375, |
|
"logps/real": -149.4184112548828, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.987037658691406, |
|
"rewards/margins": 44.69217300415039, |
|
"rewards/real": -0.29486605525016785, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1149289099526064e-07, |
|
"logits/generated": -0.46628251671791077, |
|
"logits/real": -0.6745079159736633, |
|
"logps/generated": -957.2546997070312, |
|
"logps/real": -154.1484832763672, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -53.21331024169922, |
|
"rewards/margins": 52.879371643066406, |
|
"rewards/real": -0.33394068479537964, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.103080568720379e-07, |
|
"logits/generated": -0.49304255843162537, |
|
"logits/real": -0.6859273314476013, |
|
"logps/generated": -900.3800659179688, |
|
"logps/real": -166.83229064941406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.62682342529297, |
|
"rewards/margins": 48.23986053466797, |
|
"rewards/real": -0.3869660794734955, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0912322274881513e-07, |
|
"logits/generated": -0.532124400138855, |
|
"logits/real": -0.6791267991065979, |
|
"logps/generated": -867.7174072265625, |
|
"logps/real": -173.84207153320312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.860843658447266, |
|
"rewards/margins": 45.43999481201172, |
|
"rewards/real": -0.4208555817604065, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.079383886255924e-07, |
|
"logits/generated": -0.5052396655082703, |
|
"logits/real": -0.6648889780044556, |
|
"logps/generated": -909.2999267578125, |
|
"logps/real": -178.1378631591797, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.73103332519531, |
|
"rewards/margins": 47.32225799560547, |
|
"rewards/real": -0.40877556800842285, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.067535545023697e-07, |
|
"logits/generated": -0.5442999601364136, |
|
"logits/real": -0.7468653917312622, |
|
"logps/generated": -960.0681762695312, |
|
"logps/real": -151.3079071044922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -56.67901611328125, |
|
"rewards/margins": 56.42626953125, |
|
"rewards/real": -0.25274744629859924, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.055687203791469e-07, |
|
"logits/generated": -0.49901169538497925, |
|
"logits/real": -0.6442614793777466, |
|
"logps/generated": -879.4744873046875, |
|
"logps/real": -180.48049926757812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.23583221435547, |
|
"rewards/margins": 46.76594161987305, |
|
"rewards/real": -0.4698910713195801, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0438388625592417e-07, |
|
"logits/generated": -0.4474611282348633, |
|
"logits/real": -0.6742789149284363, |
|
"logps/generated": -857.6492309570312, |
|
"logps/real": -136.08782958984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.95075988769531, |
|
"rewards/margins": 43.52191925048828, |
|
"rewards/real": -0.4288388788700104, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0319905213270144e-07, |
|
"logits/generated": -0.42652368545532227, |
|
"logits/real": -0.6252545118331909, |
|
"logps/generated": -812.95361328125, |
|
"logps/real": -163.1314239501953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -42.40178680419922, |
|
"rewards/margins": 42.024620056152344, |
|
"rewards/real": -0.37716203927993774, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0201421800947866e-07, |
|
"logits/generated": -0.41971296072006226, |
|
"logits/real": -0.6229659914970398, |
|
"logps/generated": -934.0114135742188, |
|
"logps/real": -141.24195861816406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -51.96159744262695, |
|
"rewards/margins": 51.59962844848633, |
|
"rewards/real": -0.3619686961174011, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.008293838862559e-07, |
|
"logits/generated": -0.5068638324737549, |
|
"logits/real": -0.7758525609970093, |
|
"logps/generated": -907.1189575195312, |
|
"logps/real": -138.0588836669922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.612152099609375, |
|
"rewards/margins": 47.226993560791016, |
|
"rewards/real": -0.38516414165496826, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9964454976303315e-07, |
|
"logits/generated": -0.4722062945365906, |
|
"logits/real": -0.6450417041778564, |
|
"logps/generated": -845.0633544921875, |
|
"logps/real": -173.4315185546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.63590621948242, |
|
"rewards/margins": 44.2160530090332, |
|
"rewards/real": -0.41985201835632324, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.984597156398104e-07, |
|
"logits/generated": -0.49884462356567383, |
|
"logits/real": -0.6271129846572876, |
|
"logps/generated": -924.2891845703125, |
|
"logps/real": -160.28036499023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.93656539916992, |
|
"rewards/margins": 52.53998947143555, |
|
"rewards/real": -0.3965730667114258, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9727488151658765e-07, |
|
"logits/generated": -0.48375964164733887, |
|
"logits/real": -0.7125850915908813, |
|
"logps/generated": -971.89404296875, |
|
"logps/real": -155.76739501953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.2780876159668, |
|
"rewards/margins": 56.74763870239258, |
|
"rewards/real": -0.5304462909698486, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.960900473933649e-07, |
|
"logits/generated": -0.5001789927482605, |
|
"logits/real": -0.6946722269058228, |
|
"logps/generated": -875.4529418945312, |
|
"logps/real": -136.772216796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.757354736328125, |
|
"rewards/margins": 45.39958953857422, |
|
"rewards/real": -0.3577651083469391, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.949052132701422e-07, |
|
"logits/generated": -0.481309711933136, |
|
"logits/real": -0.6437762975692749, |
|
"logps/generated": -897.0853271484375, |
|
"logps/real": -148.08895874023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.52891540527344, |
|
"rewards/margins": 48.114437103271484, |
|
"rewards/real": -0.4144725203514099, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9372037914691946e-07, |
|
"logits/generated": -0.505331814289093, |
|
"logits/real": -0.7198413014411926, |
|
"logps/generated": -943.2364501953125, |
|
"logps/real": -126.28971099853516, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.058265686035156, |
|
"rewards/margins": 51.653160095214844, |
|
"rewards/real": -0.4051007330417633, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9253554502369663e-07, |
|
"logits/generated": -0.4623163342475891, |
|
"logits/real": -0.6992497444152832, |
|
"logps/generated": -924.9318237304688, |
|
"logps/real": -165.45114135742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.68879318237305, |
|
"rewards/margins": 52.19450759887695, |
|
"rewards/real": -0.4942806363105774, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.913507109004739e-07, |
|
"logits/generated": -0.4906153082847595, |
|
"logits/real": -0.6900730729103088, |
|
"logps/generated": -922.6204223632812, |
|
"logps/real": -129.15237426757812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -51.038394927978516, |
|
"rewards/margins": 50.75798797607422, |
|
"rewards/real": -0.2804059386253357, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.901658767772512e-07, |
|
"logits/generated": -0.4813242554664612, |
|
"logits/real": -0.7062429785728455, |
|
"logps/generated": -821.6672973632812, |
|
"logps/real": -163.94393920898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -43.79729461669922, |
|
"rewards/margins": 43.21052932739258, |
|
"rewards/real": -0.5867670178413391, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.889810426540284e-07, |
|
"logits/generated": -0.43650323152542114, |
|
"logits/real": -0.723192572593689, |
|
"logps/generated": -875.97412109375, |
|
"logps/real": -134.3582000732422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.2892951965332, |
|
"rewards/margins": 47.99460220336914, |
|
"rewards/real": -0.2946951985359192, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779620853080567e-07, |
|
"logits/generated": -0.4641779065132141, |
|
"logits/real": -0.6070187091827393, |
|
"logps/generated": -904.2113037109375, |
|
"logps/real": -158.0858612060547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -51.96845245361328, |
|
"rewards/margins": 51.64441680908203, |
|
"rewards/real": -0.3240307569503784, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8661137440758294e-07, |
|
"logits/generated": -0.543385922908783, |
|
"logits/real": -0.6664692163467407, |
|
"logps/generated": -886.740234375, |
|
"logps/real": -158.68768310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.016876220703125, |
|
"rewards/margins": 47.88544464111328, |
|
"rewards/real": -0.13143035769462585, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.854265402843602e-07, |
|
"logits/generated": -0.5325735807418823, |
|
"logits/real": -0.6880441904067993, |
|
"logps/generated": -889.3059692382812, |
|
"logps/real": -143.56271362304688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.709747314453125, |
|
"rewards/margins": 48.41456604003906, |
|
"rewards/real": -0.29517900943756104, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842417061611374e-07, |
|
"logits/generated": -0.5220402479171753, |
|
"logits/real": -0.7008036375045776, |
|
"logps/generated": -901.8049926757812, |
|
"logps/real": -151.60629272460938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.60210037231445, |
|
"rewards/margins": 47.190391540527344, |
|
"rewards/real": -0.4117053151130676, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8305687203791465e-07, |
|
"logits/generated": -0.5935906171798706, |
|
"logits/real": -0.8247605562210083, |
|
"logps/generated": -907.3389892578125, |
|
"logps/real": -138.0072479248047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -49.43927001953125, |
|
"rewards/margins": 49.20110321044922, |
|
"rewards/real": -0.23817138373851776, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8187203791469193e-07, |
|
"logits/generated": -0.5039738416671753, |
|
"logits/real": -0.7532294988632202, |
|
"logps/generated": -940.0494995117188, |
|
"logps/real": -136.12376403808594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.96204376220703, |
|
"rewards/margins": 55.6636962890625, |
|
"rewards/real": -0.2983424961566925, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.806872037914692e-07, |
|
"logits/generated": -0.5363454818725586, |
|
"logits/real": -0.6708102822303772, |
|
"logps/generated": -924.869140625, |
|
"logps/real": -158.69607543945312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -50.98878860473633, |
|
"rewards/margins": 50.51224899291992, |
|
"rewards/real": -0.4765354096889496, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.795023696682464e-07, |
|
"logits/generated": -0.5462719202041626, |
|
"logits/real": -0.6856478452682495, |
|
"logps/generated": -908.8020629882812, |
|
"logps/real": -161.13491821289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -50.132118225097656, |
|
"rewards/margins": 49.841583251953125, |
|
"rewards/real": -0.2905333936214447, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.783175355450237e-07, |
|
"logits/generated": -0.5182799696922302, |
|
"logits/real": -0.7454923391342163, |
|
"logps/generated": -868.3385009765625, |
|
"logps/real": -131.3604736328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.998130798339844, |
|
"rewards/margins": 47.67443084716797, |
|
"rewards/real": -0.32370421290397644, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7713270142180097e-07, |
|
"logits/generated": -0.46355119347572327, |
|
"logits/real": -0.6795281171798706, |
|
"logps/generated": -893.9700927734375, |
|
"logps/real": -150.07882690429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -49.319942474365234, |
|
"rewards/margins": 48.991004943847656, |
|
"rewards/real": -0.3289386034011841, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.759478672985782e-07, |
|
"logits/generated": -0.477322518825531, |
|
"logits/real": -0.6399216055870056, |
|
"logps/generated": -928.3748779296875, |
|
"logps/real": -148.43809509277344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -53.18648147583008, |
|
"rewards/margins": 53.1119499206543, |
|
"rewards/real": -0.07453130185604095, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.747630331753554e-07, |
|
"logits/generated": -0.47234511375427246, |
|
"logits/real": -0.5880690217018127, |
|
"logps/generated": -861.2603759765625, |
|
"logps/real": -168.40231323242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -47.96990203857422, |
|
"rewards/margins": 47.553489685058594, |
|
"rewards/real": -0.4164124131202698, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.735781990521327e-07, |
|
"logits/generated": -0.4614683985710144, |
|
"logits/real": -0.5570347309112549, |
|
"logps/generated": -906.6154174804688, |
|
"logps/real": -196.1412811279297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -49.976844787597656, |
|
"rewards/margins": 49.53584671020508, |
|
"rewards/real": -0.44100189208984375, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7239336492890995e-07, |
|
"logits/generated": -0.49024948477745056, |
|
"logits/real": -0.6667122840881348, |
|
"logps/generated": -1002.1686401367188, |
|
"logps/real": -143.6676788330078, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.17181396484375, |
|
"rewards/margins": 58.65046310424805, |
|
"rewards/real": -0.521342933177948, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7120853080568717e-07, |
|
"logits/generated": -0.4796825051307678, |
|
"logits/real": -0.701050877571106, |
|
"logps/generated": -962.9171752929688, |
|
"logps/real": -143.58999633789062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -54.36942672729492, |
|
"rewards/margins": 54.066436767578125, |
|
"rewards/real": -0.30298811197280884, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7002369668246444e-07, |
|
"logits/generated": -0.49393147230148315, |
|
"logits/real": -0.6975389719009399, |
|
"logps/generated": -1020.3138427734375, |
|
"logps/real": -122.83097839355469, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.38806915283203, |
|
"rewards/margins": 60.000091552734375, |
|
"rewards/real": -0.3879725933074951, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.688388625592417e-07, |
|
"logits/generated": -0.4902682900428772, |
|
"logits/real": -0.6860643625259399, |
|
"logps/generated": -1019.5838012695312, |
|
"logps/real": -162.5839080810547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -61.252159118652344, |
|
"rewards/margins": 60.7586669921875, |
|
"rewards/real": -0.49349674582481384, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6765402843601894e-07, |
|
"logits/generated": -0.45780739188194275, |
|
"logits/real": -0.6357568502426147, |
|
"logps/generated": -1011.2120971679688, |
|
"logps/real": -139.41915893554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.23823165893555, |
|
"rewards/margins": 58.670433044433594, |
|
"rewards/real": -0.567794919013977, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.664691943127962e-07, |
|
"logits/generated": -0.4731730818748474, |
|
"logits/real": -0.7192245721817017, |
|
"logps/generated": -993.3472900390625, |
|
"logps/real": -147.27291870117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.850547790527344, |
|
"rewards/margins": 57.4837532043457, |
|
"rewards/real": -0.3668076992034912, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6528436018957343e-07, |
|
"logits/generated": -0.44004377722740173, |
|
"logits/real": -0.6283164620399475, |
|
"logps/generated": -987.6051025390625, |
|
"logps/real": -150.0260009765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.029502868652344, |
|
"rewards/margins": 58.68867111206055, |
|
"rewards/real": -0.34083858132362366, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.640995260663507e-07, |
|
"logits/generated": -0.49537092447280884, |
|
"logits/real": -0.7183640003204346, |
|
"logps/generated": -1005.8214721679688, |
|
"logps/real": -139.62448120117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.904502868652344, |
|
"rewards/margins": 57.10654830932617, |
|
"rewards/real": -0.7979534864425659, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629146919431279e-07, |
|
"logits/generated": -0.4345122277736664, |
|
"logits/real": -0.6478680968284607, |
|
"logps/generated": -1020.6052856445312, |
|
"logps/real": -167.0375213623047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.1839599609375, |
|
"rewards/margins": 59.437049865722656, |
|
"rewards/real": -0.7469125986099243, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.617298578199052e-07, |
|
"logits/generated": -0.5310551524162292, |
|
"logits/real": -0.6770363450050354, |
|
"logps/generated": -997.2672119140625, |
|
"logps/real": -142.95449829101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.05806350708008, |
|
"rewards/margins": 59.3278923034668, |
|
"rewards/real": -0.7301737666130066, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6054502369668247e-07, |
|
"logits/generated": -0.49158763885498047, |
|
"logits/real": -0.7231532335281372, |
|
"logps/generated": -1001.833984375, |
|
"logps/real": -135.50466918945312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.535316467285156, |
|
"rewards/margins": 60.20942306518555, |
|
"rewards/real": -0.3258832097053528, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936018957345974e-07, |
|
"logits/generated": -0.4792296886444092, |
|
"logits/real": -0.7686340808868408, |
|
"logps/generated": -935.7185668945312, |
|
"logps/real": -144.28750610351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -50.79018020629883, |
|
"rewards/margins": 50.41889190673828, |
|
"rewards/real": -0.37129008769989014, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5817535545023696e-07, |
|
"logits/generated": -0.45573297142982483, |
|
"logits/real": -0.6030322909355164, |
|
"logps/generated": -961.43896484375, |
|
"logps/real": -175.4803466796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -58.09404754638672, |
|
"rewards/margins": 57.536399841308594, |
|
"rewards/real": -0.557651698589325, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5699052132701423e-07, |
|
"logits/generated": -0.4555717408657074, |
|
"logits/real": -0.6126461625099182, |
|
"logps/generated": -971.4552612304688, |
|
"logps/real": -152.1224822998047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.64960479736328, |
|
"rewards/margins": 56.96089553833008, |
|
"rewards/real": -0.6887052655220032, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5580568720379145e-07, |
|
"logits/generated": -0.37829676270484924, |
|
"logits/real": -0.6520699262619019, |
|
"logps/generated": -1022.7066650390625, |
|
"logps/real": -145.3723602294922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -64.4852294921875, |
|
"rewards/margins": 64.01722717285156, |
|
"rewards/real": -0.46799802780151367, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5462085308056867e-07, |
|
"logits/generated": -0.4396567940711975, |
|
"logits/real": -0.6784273982048035, |
|
"logps/generated": -990.3728637695312, |
|
"logps/real": -128.4115447998047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -58.081809997558594, |
|
"rewards/margins": 57.558265686035156, |
|
"rewards/real": -0.523552417755127, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5343601895734595e-07, |
|
"logits/generated": -0.45570698380470276, |
|
"logits/real": -0.6596937775611877, |
|
"logps/generated": -939.7443237304688, |
|
"logps/real": -153.6055145263672, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -53.053611755371094, |
|
"rewards/margins": 52.287078857421875, |
|
"rewards/real": -0.7665325403213501, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.522511848341232e-07, |
|
"logits/generated": -0.46676602959632874, |
|
"logits/real": -0.6501291394233704, |
|
"logps/generated": -935.5755615234375, |
|
"logps/real": -158.39944458007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -53.2735481262207, |
|
"rewards/margins": 52.781654357910156, |
|
"rewards/real": -0.491886705160141, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.510663507109005e-07, |
|
"logits/generated": -0.40136367082595825, |
|
"logits/real": -0.6050557494163513, |
|
"logps/generated": -951.0081176757812, |
|
"logps/real": -167.49505615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -54.12383270263672, |
|
"rewards/margins": 53.68292236328125, |
|
"rewards/real": -0.4409194886684418, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.498815165876777e-07, |
|
"logits/generated": -0.42623743414878845, |
|
"logits/real": -0.5959832668304443, |
|
"logps/generated": -1012.42724609375, |
|
"logps/real": -158.5472412109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.254615783691406, |
|
"rewards/margins": 58.894569396972656, |
|
"rewards/real": -0.3600441813468933, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.48696682464455e-07, |
|
"logits/generated": -0.43621063232421875, |
|
"logits/real": -0.6673040390014648, |
|
"logps/generated": -950.0224609375, |
|
"logps/real": -131.24407958984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.44426727294922, |
|
"rewards/margins": 54.97258758544922, |
|
"rewards/real": -0.47168129682540894, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.475118483412322e-07, |
|
"logits/generated": -0.4405759871006012, |
|
"logits/real": -0.6971568465232849, |
|
"logps/generated": -993.9846801757812, |
|
"logps/real": -135.9408416748047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.888824462890625, |
|
"rewards/margins": 57.3767204284668, |
|
"rewards/real": -0.5121084451675415, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.463270142180095e-07, |
|
"logits/generated": -0.43571940064430237, |
|
"logits/real": -0.5744475722312927, |
|
"logps/generated": -1015.73095703125, |
|
"logps/real": -158.0771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -61.4149055480957, |
|
"rewards/margins": 60.82659149169922, |
|
"rewards/real": -0.5883184671401978, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.451421800947867e-07, |
|
"logits/generated": -0.4443763196468353, |
|
"logits/real": -0.6300617456436157, |
|
"logps/generated": -948.5537109375, |
|
"logps/real": -138.87777709960938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -53.80131149291992, |
|
"rewards/margins": 53.32474899291992, |
|
"rewards/real": -0.4765622019767761, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4395734597156397e-07, |
|
"logits/generated": -0.4608997702598572, |
|
"logits/real": -0.64490807056427, |
|
"logps/generated": -968.9459228515625, |
|
"logps/real": -141.3875732421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -56.779457092285156, |
|
"rewards/margins": 56.22953414916992, |
|
"rewards/real": -0.5499221682548523, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4277251184834124e-07, |
|
"logits/generated": -0.4236997663974762, |
|
"logits/real": -0.6387981176376343, |
|
"logps/generated": -1061.720947265625, |
|
"logps/real": -142.55154418945312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.07857513427734, |
|
"rewards/margins": 64.67992401123047, |
|
"rewards/real": -0.39864128828048706, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4158767772511846e-07, |
|
"logits/generated": -0.4931492805480957, |
|
"logits/real": -0.6415807008743286, |
|
"logps/generated": -936.0066528320312, |
|
"logps/real": -148.57513427734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.94257736206055, |
|
"rewards/margins": 52.371917724609375, |
|
"rewards/real": -0.5706599950790405, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4040284360189573e-07, |
|
"logits/generated": -0.463541179895401, |
|
"logits/real": -0.642052412033081, |
|
"logps/generated": -989.3040771484375, |
|
"logps/real": -163.30599975585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -58.35044479370117, |
|
"rewards/margins": 57.896202087402344, |
|
"rewards/real": -0.4542439877986908, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.39218009478673e-07, |
|
"logits/generated": -0.43307337164878845, |
|
"logits/real": -0.7004517316818237, |
|
"logps/generated": -1077.06884765625, |
|
"logps/real": -144.35037231445312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -64.49039459228516, |
|
"rewards/margins": 63.91597366333008, |
|
"rewards/real": -0.5744192004203796, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3803317535545023e-07, |
|
"logits/generated": -0.4095051884651184, |
|
"logits/real": -0.6760072112083435, |
|
"logps/generated": -931.9710693359375, |
|
"logps/real": -144.9515380859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.00127029418945, |
|
"rewards/margins": 54.52852249145508, |
|
"rewards/real": -0.4727482795715332, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3684834123222747e-07, |
|
"logits/generated": -0.4472767412662506, |
|
"logits/real": -0.5491870641708374, |
|
"logps/generated": -999.7566528320312, |
|
"logps/real": -166.29904174804688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.471107482910156, |
|
"rewards/margins": 58.70185089111328, |
|
"rewards/real": -0.7692559361457825, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3566350710900475e-07, |
|
"logits/generated": -0.41070666909217834, |
|
"logits/real": -0.5636172890663147, |
|
"logps/generated": -1065.25537109375, |
|
"logps/real": -148.3241729736328, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.23228454589844, |
|
"rewards/margins": 66.70288848876953, |
|
"rewards/real": -0.5294026732444763, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3447867298578197e-07, |
|
"logits/generated": -0.46938830614089966, |
|
"logits/real": -0.667069137096405, |
|
"logps/generated": -994.6066284179688, |
|
"logps/real": -160.06761169433594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.15541458129883, |
|
"rewards/margins": 59.66267013549805, |
|
"rewards/real": -0.49274301528930664, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3329383886255924e-07, |
|
"logits/generated": -0.4746991991996765, |
|
"logits/real": -0.6093307733535767, |
|
"logps/generated": -992.4873046875, |
|
"logps/real": -160.92660522460938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.234031677246094, |
|
"rewards/margins": 58.680206298828125, |
|
"rewards/real": -0.5538274049758911, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3210900473933649e-07, |
|
"logits/generated": -0.4014422297477722, |
|
"logits/real": -0.6751469373703003, |
|
"logps/generated": -929.3016357421875, |
|
"logps/real": -150.0366973876953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.02751922607422, |
|
"rewards/margins": 54.57123565673828, |
|
"rewards/real": -0.4562840461730957, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3092417061611373e-07, |
|
"logits/generated": -0.44085001945495605, |
|
"logits/real": -0.6542856693267822, |
|
"logps/generated": -1020.89404296875, |
|
"logps/real": -162.69509887695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.03368377685547, |
|
"rewards/margins": 61.63750076293945, |
|
"rewards/real": -0.3961876928806305, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2973933649289098e-07, |
|
"logits/generated": -0.45564159750938416, |
|
"logits/real": -0.701114296913147, |
|
"logps/generated": -1048.5789794921875, |
|
"logps/real": -124.47825622558594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.25930786132812, |
|
"rewards/margins": 64.69044494628906, |
|
"rewards/real": -0.5688632130622864, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2855450236966822e-07, |
|
"logits/generated": -0.5452786087989807, |
|
"logits/real": -0.6592291593551636, |
|
"logps/generated": -1069.452880859375, |
|
"logps/real": -159.27734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.70073699951172, |
|
"rewards/margins": 65.43223571777344, |
|
"rewards/real": -0.26849886775016785, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.273696682464455e-07, |
|
"logits/generated": -0.46791744232177734, |
|
"logits/real": -0.6787043213844299, |
|
"logps/generated": -861.5823974609375, |
|
"logps/real": -147.97743225097656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.79942321777344, |
|
"rewards/margins": 45.491188049316406, |
|
"rewards/real": -0.30823782086372375, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2618483412322272e-07, |
|
"logits/generated": -0.41990095376968384, |
|
"logits/real": -0.6832523345947266, |
|
"logps/generated": -932.8555908203125, |
|
"logps/real": -131.0704345703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.52861785888672, |
|
"rewards/margins": 52.190757751464844, |
|
"rewards/real": -0.3378532826900482, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.25e-07, |
|
"logits/generated": -0.4100368916988373, |
|
"logits/real": -0.5835294723510742, |
|
"logps/generated": -1038.96630859375, |
|
"logps/real": -184.62283325195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.66301727294922, |
|
"rewards/margins": 62.26544952392578, |
|
"rewards/real": -0.39757412672042847, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2381516587677724e-07, |
|
"logits/generated": -0.44541412591934204, |
|
"logits/real": -0.6417989730834961, |
|
"logps/generated": -942.4816284179688, |
|
"logps/real": -173.33139038085938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.49482345581055, |
|
"rewards/margins": 55.00482177734375, |
|
"rewards/real": -0.49000295996665955, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.226303317535545e-07, |
|
"logits/generated": -0.44918951392173767, |
|
"logits/real": -0.6756407618522644, |
|
"logps/generated": -1055.998779296875, |
|
"logps/real": -164.0789337158203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -64.55057525634766, |
|
"rewards/margins": 63.80998992919922, |
|
"rewards/real": -0.7405800819396973, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2144549763033173e-07, |
|
"logits/generated": -0.3896518051624298, |
|
"logits/real": -0.6300621628761292, |
|
"logps/generated": -971.3762817382812, |
|
"logps/real": -116.8947525024414, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -56.8530158996582, |
|
"rewards/margins": 56.00217819213867, |
|
"rewards/real": -0.8508402705192566, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.20260663507109e-07, |
|
"logits/generated": -0.40912383794784546, |
|
"logits/real": -0.571279764175415, |
|
"logps/generated": -1047.6051025390625, |
|
"logps/real": -176.30783081054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.89017486572266, |
|
"rewards/margins": 67.03627014160156, |
|
"rewards/real": -0.8538981676101685, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1907582938388625e-07, |
|
"logits/generated": -0.3686346113681793, |
|
"logits/real": -0.6175917387008667, |
|
"logps/generated": -1123.7552490234375, |
|
"logps/real": -157.916748046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.57915496826172, |
|
"rewards/margins": 69.48426055908203, |
|
"rewards/real": -1.0949029922485352, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.178909952606635e-07, |
|
"logits/generated": -0.4202900826931, |
|
"logits/real": -0.6336459517478943, |
|
"logps/generated": -1137.767822265625, |
|
"logps/real": -124.10030364990234, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.68309783935547, |
|
"rewards/margins": 70.78590393066406, |
|
"rewards/real": -0.8971970677375793, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1670616113744074e-07, |
|
"logits/generated": -0.3994109034538269, |
|
"logits/real": -0.634803831577301, |
|
"logps/generated": -1005.7017822265625, |
|
"logps/real": -169.7325439453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.44282150268555, |
|
"rewards/margins": 58.54929733276367, |
|
"rewards/real": -0.8935245275497437, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.15521327014218e-07, |
|
"logits/generated": -0.3749557137489319, |
|
"logits/real": -0.6287773251533508, |
|
"logps/generated": -1105.1219482421875, |
|
"logps/real": -147.91397094726562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.544677734375, |
|
"rewards/margins": 68.75887298583984, |
|
"rewards/real": -0.785801351070404, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1433649289099526e-07, |
|
"logits/generated": -0.4119029641151428, |
|
"logits/real": -0.5460438132286072, |
|
"logps/generated": -1043.18310546875, |
|
"logps/real": -165.08152770996094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -63.2020263671875, |
|
"rewards/margins": 62.2581672668457, |
|
"rewards/real": -0.9438508749008179, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.131516587677725e-07, |
|
"logits/generated": -0.40759310126304626, |
|
"logits/real": -0.5606673955917358, |
|
"logps/generated": -1038.422119140625, |
|
"logps/real": -154.99227905273438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -61.8538818359375, |
|
"rewards/margins": 61.08037567138672, |
|
"rewards/real": -0.77350914478302, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1196682464454975e-07, |
|
"logits/generated": -0.40789279341697693, |
|
"logits/real": -0.6496740579605103, |
|
"logps/generated": -1112.929931640625, |
|
"logps/real": -132.43563842773438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.68193054199219, |
|
"rewards/margins": 69.83362579345703, |
|
"rewards/real": -0.8483074903488159, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.10781990521327e-07, |
|
"logits/generated": -0.37787383794784546, |
|
"logits/real": -0.5792855024337769, |
|
"logps/generated": -992.4094848632812, |
|
"logps/real": -154.80380249023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.6590461730957, |
|
"rewards/margins": 56.728843688964844, |
|
"rewards/real": -0.9301955103874207, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0959715639810427e-07, |
|
"logits/generated": -0.4275107979774475, |
|
"logits/real": -0.6507991552352905, |
|
"logps/generated": -1179.656982421875, |
|
"logps/real": -151.2890167236328, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.20372009277344, |
|
"rewards/margins": 75.1134262084961, |
|
"rewards/real": -1.0902981758117676, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0841232227488152e-07, |
|
"logits/generated": -0.4114235043525696, |
|
"logits/real": -0.6667225360870361, |
|
"logps/generated": -1071.714599609375, |
|
"logps/real": -133.47251892089844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.06950378417969, |
|
"rewards/margins": 67.27735137939453, |
|
"rewards/real": -0.7921562790870667, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0722748815165874e-07, |
|
"logits/generated": -0.41574984788894653, |
|
"logits/real": -0.6365878582000732, |
|
"logps/generated": -1119.65087890625, |
|
"logps/real": -142.26431274414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.78968811035156, |
|
"rewards/margins": 70.04450988769531, |
|
"rewards/real": -0.745174765586853, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.06042654028436e-07, |
|
"logits/generated": -0.37902599573135376, |
|
"logits/real": -0.5311517119407654, |
|
"logps/generated": -1179.3172607421875, |
|
"logps/real": -160.26150512695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.18898010253906, |
|
"rewards/margins": 72.45357513427734, |
|
"rewards/real": -0.7354053258895874, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0485781990521326e-07, |
|
"logits/generated": -0.3815317153930664, |
|
"logits/real": -0.5580254793167114, |
|
"logps/generated": -1023.4977416992188, |
|
"logps/real": -177.36837768554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.73035430908203, |
|
"rewards/margins": 61.690452575683594, |
|
"rewards/real": -1.0398961305618286, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0367298578199053e-07, |
|
"logits/generated": -0.44472736120224, |
|
"logits/real": -0.6714332699775696, |
|
"logps/generated": -1140.576416015625, |
|
"logps/real": -164.40725708007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.26954650878906, |
|
"rewards/margins": 71.59504699707031, |
|
"rewards/real": -0.6745188236236572, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0248815165876775e-07, |
|
"logits/generated": -0.4064870774745941, |
|
"logits/real": -0.6100367903709412, |
|
"logps/generated": -1150.54833984375, |
|
"logps/real": -160.94125366210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.72151184082031, |
|
"rewards/margins": 74.99349975585938, |
|
"rewards/real": -0.7280232906341553, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0130331753554502e-07, |
|
"logits/generated": -0.4334731698036194, |
|
"logits/real": -0.6273586750030518, |
|
"logps/generated": -1114.251220703125, |
|
"logps/real": -167.9752655029297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.68013000488281, |
|
"rewards/margins": 68.87593078613281, |
|
"rewards/real": -0.8041984438896179, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011848341232227e-07, |
|
"logits/generated": -0.37390297651290894, |
|
"logits/real": -0.5925924777984619, |
|
"logps/generated": -1171.7125244140625, |
|
"logps/real": -151.46539306640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.30975341796875, |
|
"rewards/margins": 75.49763488769531, |
|
"rewards/real": -0.8121153712272644, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9893364928909952e-07, |
|
"logits/generated": -0.3266315758228302, |
|
"logits/real": -0.5822888612747192, |
|
"logps/generated": -1063.568115234375, |
|
"logps/real": -150.86004638671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -64.58686828613281, |
|
"rewards/margins": 63.89935302734375, |
|
"rewards/real": -0.6875194311141968, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9774881516587676e-07, |
|
"logits/generated": -0.38277262449264526, |
|
"logits/real": -0.5801711678504944, |
|
"logps/generated": -1175.657470703125, |
|
"logps/real": -141.32138061523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.63087463378906, |
|
"rewards/margins": 76.90750122070312, |
|
"rewards/real": -0.7233678102493286, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.96563981042654e-07, |
|
"logits/generated": -0.35185354948043823, |
|
"logits/real": -0.6176477670669556, |
|
"logps/generated": -1025.0194091796875, |
|
"logps/real": -130.98770141601562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.04143524169922, |
|
"rewards/margins": 61.38140106201172, |
|
"rewards/real": -0.6600403189659119, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9537914691943128e-07, |
|
"logits/generated": -0.40409189462661743, |
|
"logits/real": -0.596308708190918, |
|
"logps/generated": -1096.9437255859375, |
|
"logps/real": -167.45077514648438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.10643768310547, |
|
"rewards/margins": 67.09183502197266, |
|
"rewards/real": -1.014599084854126, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9419431279620853e-07, |
|
"logits/generated": -0.356533408164978, |
|
"logits/real": -0.5840550661087036, |
|
"logps/generated": -1120.258056640625, |
|
"logps/real": -153.67636108398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.5401840209961, |
|
"rewards/margins": 70.77323913574219, |
|
"rewards/real": -0.7669495940208435, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9300947867298577e-07, |
|
"logits/generated": -0.39099499583244324, |
|
"logits/real": -0.5445195436477661, |
|
"logps/generated": -1096.966552734375, |
|
"logps/real": -185.20242309570312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.42916107177734, |
|
"rewards/margins": 67.67522430419922, |
|
"rewards/real": -0.7539411187171936, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9182464454976302e-07, |
|
"logits/generated": -0.3674711287021637, |
|
"logits/real": -0.6313090920448303, |
|
"logps/generated": -998.6416015625, |
|
"logps/real": -132.3099365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.564292907714844, |
|
"rewards/margins": 58.874427795410156, |
|
"rewards/real": -0.6898680329322815, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.906398104265403e-07, |
|
"logits/generated": -0.3422473669052124, |
|
"logits/real": -0.5261892676353455, |
|
"logps/generated": -1169.02880859375, |
|
"logps/real": -149.87451171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.00128173828125, |
|
"rewards/margins": 75.43621063232422, |
|
"rewards/real": -0.5650706887245178, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8945497630331754e-07, |
|
"logits/generated": -0.40371593832969666, |
|
"logits/real": -0.5681861042976379, |
|
"logps/generated": -989.8043823242188, |
|
"logps/real": -161.1966552734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.001930236816406, |
|
"rewards/margins": 59.34397506713867, |
|
"rewards/real": -0.6579534411430359, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8827014218009476e-07, |
|
"logits/generated": -0.3659510016441345, |
|
"logits/real": -0.5965005159378052, |
|
"logps/generated": -1124.320068359375, |
|
"logps/real": -151.7841033935547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.14144897460938, |
|
"rewards/margins": 72.28865051269531, |
|
"rewards/real": -0.8528071641921997, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8708530805687203e-07, |
|
"logits/generated": -0.33026427030563354, |
|
"logits/real": -0.5088328719139099, |
|
"logps/generated": -1115.7996826171875, |
|
"logps/real": -135.47061157226562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.01557159423828, |
|
"rewards/margins": 70.09529113769531, |
|
"rewards/real": -0.9202736020088196, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8590047393364928e-07, |
|
"logits/generated": -0.35965341329574585, |
|
"logits/real": -0.5917715430259705, |
|
"logps/generated": -1188.393310546875, |
|
"logps/real": -164.9335174560547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -79.32763671875, |
|
"rewards/margins": 78.70462036132812, |
|
"rewards/real": -0.623020589351654, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8471563981042655e-07, |
|
"logits/generated": -0.3648239076137543, |
|
"logits/real": -0.5664030313491821, |
|
"logps/generated": -1057.7745361328125, |
|
"logps/real": -164.2882080078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.43244171142578, |
|
"rewards/margins": 64.76323699951172, |
|
"rewards/real": -0.6691963076591492, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8353080568720377e-07, |
|
"logits/generated": -0.2945231795310974, |
|
"logits/real": -0.539868950843811, |
|
"logps/generated": -1135.911865234375, |
|
"logps/real": -126.64066314697266, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.74871063232422, |
|
"rewards/margins": 74.0860595703125, |
|
"rewards/real": -0.6626566648483276, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8234597156398104e-07, |
|
"logits/generated": -0.32069313526153564, |
|
"logits/real": -0.5126243829727173, |
|
"logps/generated": -1169.849365234375, |
|
"logps/real": -147.9727783203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.32138061523438, |
|
"rewards/margins": 75.6849136352539, |
|
"rewards/real": -0.6364642381668091, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.811611374407583e-07, |
|
"logits/generated": -0.3039155900478363, |
|
"logits/real": -0.5434777140617371, |
|
"logps/generated": -1120.1385498046875, |
|
"logps/real": -150.61309814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.19499206542969, |
|
"rewards/margins": 71.50361633300781, |
|
"rewards/real": -0.6913769841194153, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7997630331753554e-07, |
|
"logits/generated": -0.33049115538597107, |
|
"logits/real": -0.5453459620475769, |
|
"logps/generated": -1100.5694580078125, |
|
"logps/real": -168.0435791015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.0591812133789, |
|
"rewards/margins": 69.451904296875, |
|
"rewards/real": -0.6072803139686584, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7879146919431278e-07, |
|
"logits/generated": -0.3514329493045807, |
|
"logits/real": -0.5001763105392456, |
|
"logps/generated": -1134.4805908203125, |
|
"logps/real": -180.07357788085938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.09115600585938, |
|
"rewards/margins": 72.2973403930664, |
|
"rewards/real": -0.793817400932312, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7760663507109003e-07, |
|
"logits/generated": -0.27006787061691284, |
|
"logits/real": -0.4615755081176758, |
|
"logps/generated": -1064.807861328125, |
|
"logps/real": -157.30935668945312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.96697998046875, |
|
"rewards/margins": 67.21857452392578, |
|
"rewards/real": -0.7484084963798523, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.764218009478673e-07, |
|
"logits/generated": -0.33314403891563416, |
|
"logits/real": -0.5476816892623901, |
|
"logps/generated": -1177.116943359375, |
|
"logps/real": -133.23471069335938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.40288543701172, |
|
"rewards/margins": 77.6571044921875, |
|
"rewards/real": -0.7457820177078247, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7523696682464452e-07, |
|
"logits/generated": -0.3331630825996399, |
|
"logits/real": -0.5389949083328247, |
|
"logps/generated": -1134.275146484375, |
|
"logps/real": -167.80189514160156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.55030059814453, |
|
"rewards/margins": 69.66114807128906, |
|
"rewards/real": -0.8891481161117554, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.740521327014218e-07, |
|
"logits/generated": -0.3793897330760956, |
|
"logits/real": -0.5896502733230591, |
|
"logps/generated": -1118.945068359375, |
|
"logps/real": -142.81280517578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.79044342041016, |
|
"rewards/margins": 71.0679702758789, |
|
"rewards/real": -0.7224776148796082, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7286729857819904e-07, |
|
"logits/generated": -0.34748396277427673, |
|
"logits/real": -0.5819012522697449, |
|
"logps/generated": -1131.2802734375, |
|
"logps/real": -144.39434814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.58968353271484, |
|
"rewards/margins": 70.8386001586914, |
|
"rewards/real": -0.7510883212089539, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7168246445497631e-07, |
|
"logits/generated": -0.35045960545539856, |
|
"logits/real": -0.5766857862472534, |
|
"logps/generated": -1157.075927734375, |
|
"logps/real": -183.49752807617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.64195251464844, |
|
"rewards/margins": 73.91766357421875, |
|
"rewards/real": -0.7242866158485413, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7049763033175353e-07, |
|
"logits/generated": -0.36225640773773193, |
|
"logits/real": -0.6122447848320007, |
|
"logps/generated": -1231.66064453125, |
|
"logps/real": -159.02023315429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.8363037109375, |
|
"rewards/margins": 81.19940185546875, |
|
"rewards/real": -0.636897623538971, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.693127962085308e-07, |
|
"logits/generated": -0.3631829619407654, |
|
"logits/real": -0.5321738123893738, |
|
"logps/generated": -1131.0858154296875, |
|
"logps/real": -174.25485229492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.96878051757812, |
|
"rewards/margins": 70.31332397460938, |
|
"rewards/real": -0.6554469466209412, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6812796208530805e-07, |
|
"logits/generated": -0.2980991005897522, |
|
"logits/real": -0.4874647557735443, |
|
"logps/generated": -1105.4827880859375, |
|
"logps/real": -152.40386962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.46055603027344, |
|
"rewards/margins": 65.54535675048828, |
|
"rewards/real": -0.9151935577392578, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.669431279620853e-07, |
|
"logits/generated": -0.28592294454574585, |
|
"logits/real": -0.4887320101261139, |
|
"logps/generated": -1115.578369140625, |
|
"logps/real": -185.43649291992188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.00923156738281, |
|
"rewards/margins": 69.16118621826172, |
|
"rewards/real": -0.8480373620986938, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6575829383886255e-07, |
|
"logits/generated": -0.37043094635009766, |
|
"logits/real": -0.579189658164978, |
|
"logps/generated": -1242.7139892578125, |
|
"logps/real": -150.39120483398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -83.98506927490234, |
|
"rewards/margins": 83.05586242675781, |
|
"rewards/real": -0.9292176961898804, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.645734597156398e-07, |
|
"logits/generated": -0.28565752506256104, |
|
"logits/real": -0.5570210218429565, |
|
"logps/generated": -1072.3984375, |
|
"logps/real": -139.39593505859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.31278991699219, |
|
"rewards/margins": 68.64981079101562, |
|
"rewards/real": -0.6629735231399536, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6338862559241706e-07, |
|
"logits/generated": -0.36683911085128784, |
|
"logits/real": -0.6102225184440613, |
|
"logps/generated": -1144.6790771484375, |
|
"logps/real": -150.28924560546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.97947692871094, |
|
"rewards/margins": 74.34928894042969, |
|
"rewards/real": -0.6301820278167725, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.622037914691943e-07, |
|
"logits/generated": -0.3236832916736603, |
|
"logits/real": -0.5270904302597046, |
|
"logps/generated": -1184.956787109375, |
|
"logps/real": -147.99168395996094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.87047576904297, |
|
"rewards/margins": 77.00102233886719, |
|
"rewards/real": -0.8694450259208679, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6101895734597156e-07, |
|
"logits/generated": -0.373867928981781, |
|
"logits/real": -0.5834953784942627, |
|
"logps/generated": -1108.7701416015625, |
|
"logps/real": -163.93594360351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.46839141845703, |
|
"rewards/margins": 69.51911926269531, |
|
"rewards/real": -0.9492788314819336, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.598341232227488e-07, |
|
"logits/generated": -0.3623445928096771, |
|
"logits/real": -0.5558930039405823, |
|
"logps/generated": -1033.65771484375, |
|
"logps/real": -145.7788848876953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -63.11626434326172, |
|
"rewards/margins": 62.55774688720703, |
|
"rewards/real": -0.5585171580314636, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5864928909952605e-07, |
|
"logits/generated": -0.3441459834575653, |
|
"logits/real": -0.5500830411911011, |
|
"logps/generated": -1076.542724609375, |
|
"logps/real": -142.98782348632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.62294006347656, |
|
"rewards/margins": 69.04898071289062, |
|
"rewards/real": -0.5739551186561584, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5746445497630332e-07, |
|
"logits/generated": -0.32716110348701477, |
|
"logits/real": -0.5438711047172546, |
|
"logps/generated": -1147.177978515625, |
|
"logps/real": -145.17544555664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.67416381835938, |
|
"rewards/margins": 71.88451385498047, |
|
"rewards/real": -0.7896552085876465, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5627962085308054e-07, |
|
"logits/generated": -0.3573629856109619, |
|
"logits/real": -0.5217954516410828, |
|
"logps/generated": -1130.4320068359375, |
|
"logps/real": -172.75650024414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.90027618408203, |
|
"rewards/margins": 71.09027862548828, |
|
"rewards/real": -0.8099902868270874, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5509478672985782e-07, |
|
"logits/generated": -0.34415721893310547, |
|
"logits/real": -0.5328197479248047, |
|
"logps/generated": -956.2267456054688, |
|
"logps/real": -184.76602172851562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -57.68220901489258, |
|
"rewards/margins": 56.873924255371094, |
|
"rewards/real": -0.8082860708236694, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5390995260663506e-07, |
|
"logits/generated": -0.3490820825099945, |
|
"logits/real": -0.5200581550598145, |
|
"logps/generated": -1079.313720703125, |
|
"logps/real": -158.89695739746094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.39738464355469, |
|
"rewards/margins": 67.54957580566406, |
|
"rewards/real": -0.8478103876113892, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5272511848341233e-07, |
|
"logits/generated": -0.32425522804260254, |
|
"logits/real": -0.5338164567947388, |
|
"logps/generated": -1100.94873046875, |
|
"logps/real": -160.40975952148438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.0157699584961, |
|
"rewards/margins": 68.4029312133789, |
|
"rewards/real": -0.6128430366516113, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5154028436018955e-07, |
|
"logits/generated": -0.3767894208431244, |
|
"logits/real": -0.5962772965431213, |
|
"logps/generated": -1119.630615234375, |
|
"logps/real": -164.57717895507812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.62731170654297, |
|
"rewards/margins": 69.80793762207031, |
|
"rewards/real": -0.8193785548210144, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5035545023696683e-07, |
|
"logits/generated": -0.3327923119068146, |
|
"logits/real": -0.5231298804283142, |
|
"logps/generated": -1091.9482421875, |
|
"logps/real": -159.0697021484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.09928894042969, |
|
"rewards/margins": 66.29541015625, |
|
"rewards/real": -0.8038908243179321, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4917061611374407e-07, |
|
"logits/generated": -0.37766391038894653, |
|
"logits/real": -0.5465415716171265, |
|
"logps/generated": -1140.1834716796875, |
|
"logps/real": -176.43777465820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.69719696044922, |
|
"rewards/margins": 72.85856628417969, |
|
"rewards/real": -0.8386209607124329, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4798578199052132e-07, |
|
"logits/generated": -0.3288564682006836, |
|
"logits/real": -0.6394492983818054, |
|
"logps/generated": -1092.682861328125, |
|
"logps/real": -138.92721557617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.9786148071289, |
|
"rewards/margins": 68.30329895019531, |
|
"rewards/real": -0.6753060817718506, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4680094786729857e-07, |
|
"logits/generated": -0.3352881371974945, |
|
"logits/real": -0.5174044370651245, |
|
"logps/generated": -1148.893798828125, |
|
"logps/real": -157.83375549316406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.80946350097656, |
|
"rewards/margins": 74.113037109375, |
|
"rewards/real": -0.6964321136474609, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.456161137440758e-07, |
|
"logits/generated": -0.38338786363601685, |
|
"logits/real": -0.6143153309822083, |
|
"logps/generated": -1172.7335205078125, |
|
"logps/real": -150.6049041748047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.13297271728516, |
|
"rewards/margins": 73.13155364990234, |
|
"rewards/real": -1.0014207363128662, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4443127962085309e-07, |
|
"logits/generated": -0.34384411573410034, |
|
"logits/real": -0.585628867149353, |
|
"logps/generated": -1103.96923828125, |
|
"logps/real": -153.5775604248047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.13743591308594, |
|
"rewards/margins": 69.50370025634766, |
|
"rewards/real": -0.6337412595748901, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4324644549763033e-07, |
|
"logits/generated": -0.34684544801712036, |
|
"logits/real": -0.6211697459220886, |
|
"logps/generated": -1069.9642333984375, |
|
"logps/real": -169.10153198242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.88719177246094, |
|
"rewards/margins": 67.12832641601562, |
|
"rewards/real": -0.7588711380958557, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4206161137440758e-07, |
|
"logits/generated": -0.40978360176086426, |
|
"logits/real": -0.5079740881919861, |
|
"logps/generated": -1203.1060791015625, |
|
"logps/real": -182.2041778564453, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.18981170654297, |
|
"rewards/margins": 80.14823913574219, |
|
"rewards/real": -1.0415685176849365, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4087677725118482e-07, |
|
"logits/generated": -0.3429732024669647, |
|
"logits/real": -0.6365989446640015, |
|
"logps/generated": -1202.0032958984375, |
|
"logps/real": -135.09603881835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -79.1153793334961, |
|
"rewards/margins": 78.39237976074219, |
|
"rewards/real": -0.7229984998703003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.396919431279621e-07, |
|
"logits/generated": -0.3157784044742584, |
|
"logits/real": -0.5285521149635315, |
|
"logps/generated": -1134.604248046875, |
|
"logps/real": -162.54391479492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.02388763427734, |
|
"rewards/margins": 71.22061157226562, |
|
"rewards/real": -0.8032848238945007, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3850710900473934e-07, |
|
"logits/generated": -0.38203898072242737, |
|
"logits/real": -0.549940824508667, |
|
"logps/generated": -1131.8427734375, |
|
"logps/real": -152.9811248779297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.25450897216797, |
|
"rewards/margins": 71.60226440429688, |
|
"rewards/real": -0.6522516012191772, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3732227488151656e-07, |
|
"logits/generated": -0.3488486707210541, |
|
"logits/real": -0.39327472448349, |
|
"logps/generated": -1084.085205078125, |
|
"logps/real": -202.0915985107422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.83716583251953, |
|
"rewards/margins": 67.02722930908203, |
|
"rewards/real": -0.8099360466003418, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3613744075829384e-07, |
|
"logits/generated": -0.29847994446754456, |
|
"logits/real": -0.5526587963104248, |
|
"logps/generated": -1222.8707275390625, |
|
"logps/real": -151.45326232910156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -84.52772521972656, |
|
"rewards/margins": 83.77813720703125, |
|
"rewards/real": -0.7495924234390259, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3495260663507108e-07, |
|
"logits/generated": -0.3559364676475525, |
|
"logits/real": -0.5464509725570679, |
|
"logps/generated": -1149.019775390625, |
|
"logps/real": -144.06759643554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.87704467773438, |
|
"rewards/margins": 72.91664123535156, |
|
"rewards/real": -0.9604147672653198, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3376777251184836e-07, |
|
"logits/generated": -0.3057738244533539, |
|
"logits/real": -0.5685640573501587, |
|
"logps/generated": -1133.9283447265625, |
|
"logps/real": -161.63894653320312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.35575103759766, |
|
"rewards/margins": 73.46749114990234, |
|
"rewards/real": -0.8882623910903931, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3258293838862558e-07, |
|
"logits/generated": -0.3922407031059265, |
|
"logits/real": -0.6674994230270386, |
|
"logps/generated": -1132.536865234375, |
|
"logps/real": -146.58091735839844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.54347229003906, |
|
"rewards/margins": 70.5415267944336, |
|
"rewards/real": -1.0019347667694092, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3139810426540285e-07, |
|
"logits/generated": -0.33480846881866455, |
|
"logits/real": -0.5054049491882324, |
|
"logps/generated": -1200.821044921875, |
|
"logps/real": -152.25894165039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.86302185058594, |
|
"rewards/margins": 77.09458923339844, |
|
"rewards/real": -0.7684418559074402, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.302132701421801e-07, |
|
"logits/generated": -0.2758890986442566, |
|
"logits/real": -0.5493889451026917, |
|
"logps/generated": -1093.770751953125, |
|
"logps/real": -157.31973266601562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.96113586425781, |
|
"rewards/margins": 69.14842987060547, |
|
"rewards/real": -0.8127008676528931, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2902843601895734e-07, |
|
"logits/generated": -0.3500101566314697, |
|
"logits/real": -0.5593122243881226, |
|
"logps/generated": -1206.6497802734375, |
|
"logps/real": -159.24282836914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -82.6231689453125, |
|
"rewards/margins": 81.7955551147461, |
|
"rewards/real": -0.8276035189628601, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.278436018957346e-07, |
|
"logits/generated": -0.3821162283420563, |
|
"logits/real": -0.6203012466430664, |
|
"logps/generated": -1269.4385986328125, |
|
"logps/real": -147.83021545410156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -88.19657897949219, |
|
"rewards/margins": 87.15613555908203, |
|
"rewards/real": -1.0404458045959473, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2665876777251183e-07, |
|
"logits/generated": -0.4196929931640625, |
|
"logits/real": -0.6896005868911743, |
|
"logps/generated": -1157.632568359375, |
|
"logps/real": -161.3779754638672, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.62742614746094, |
|
"rewards/margins": 74.22064208984375, |
|
"rewards/real": -0.40678733587265015, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.254739336492891e-07, |
|
"logits/generated": -0.46330398321151733, |
|
"logits/real": -0.6500687003135681, |
|
"logps/generated": -1221.859130859375, |
|
"logps/real": -170.46438598632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -80.08015441894531, |
|
"rewards/margins": 79.51933288574219, |
|
"rewards/real": -0.5608130693435669, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2428909952606635e-07, |
|
"logits/generated": -0.4650397300720215, |
|
"logits/real": -0.6424258351325989, |
|
"logps/generated": -1196.016357421875, |
|
"logps/real": -148.45826721191406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.94972229003906, |
|
"rewards/margins": 76.64620208740234, |
|
"rewards/real": -0.3035140633583069, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.231042654028436e-07, |
|
"logits/generated": -0.4637749195098877, |
|
"logits/real": -0.6913520097732544, |
|
"logps/generated": -1081.8062744140625, |
|
"logps/real": -143.20822143554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.7381820678711, |
|
"rewards/margins": 68.34100341796875, |
|
"rewards/real": -0.39719128608703613, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2191943127962085e-07, |
|
"logits/generated": -0.4392772614955902, |
|
"logits/real": -0.5955749750137329, |
|
"logps/generated": -1123.980712890625, |
|
"logps/real": -200.45895385742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.12271881103516, |
|
"rewards/margins": 71.51152038574219, |
|
"rewards/real": -0.6112003326416016, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.207345971563981e-07, |
|
"logits/generated": -0.43859052658081055, |
|
"logits/real": -0.5733956098556519, |
|
"logps/generated": -1162.6119384765625, |
|
"logps/real": -168.77096557617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.10337829589844, |
|
"rewards/margins": 76.67765045166016, |
|
"rewards/real": -0.42572155594825745, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1954976303317534e-07, |
|
"logits/generated": -0.47363168001174927, |
|
"logits/real": -0.6165460348129272, |
|
"logps/generated": -1090.505859375, |
|
"logps/real": -164.0859832763672, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.29020690917969, |
|
"rewards/margins": 69.48956298828125, |
|
"rewards/real": -0.8006424903869629, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.183649289099526e-07, |
|
"logits/generated": -0.41170722246170044, |
|
"logits/real": -0.6370391249656677, |
|
"logps/generated": -1108.5679931640625, |
|
"logps/real": -160.58328247070312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.86441040039062, |
|
"rewards/margins": 71.31538391113281, |
|
"rewards/real": -0.549027681350708, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1718009478672986e-07, |
|
"logits/generated": -0.379972368478775, |
|
"logits/real": -0.6687403321266174, |
|
"logps/generated": -1153.8831787109375, |
|
"logps/real": -136.22607421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.22931671142578, |
|
"rewards/margins": 74.69772338867188, |
|
"rewards/real": -0.5315843820571899, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.159952606635071e-07, |
|
"logits/generated": -0.39145171642303467, |
|
"logits/real": -0.5640333294868469, |
|
"logps/generated": -1174.184326171875, |
|
"logps/real": -156.5506591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.92201232910156, |
|
"rewards/margins": 76.40318298339844, |
|
"rewards/real": -0.5188380479812622, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1481042654028436e-07, |
|
"logits/generated": -0.4219874441623688, |
|
"logits/real": -0.6748846769332886, |
|
"logps/generated": -1189.40771484375, |
|
"logps/real": -150.1600799560547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.53298950195312, |
|
"rewards/margins": 76.97785949707031, |
|
"rewards/real": -0.5551234483718872, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.136255924170616e-07, |
|
"logits/generated": -0.40171951055526733, |
|
"logits/real": -0.5924757122993469, |
|
"logps/generated": -1085.5384521484375, |
|
"logps/real": -151.51275634765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.87525177001953, |
|
"rewards/margins": 69.43013763427734, |
|
"rewards/real": -0.4451100826263428, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1244075829383886e-07, |
|
"logits/generated": -0.46468549966812134, |
|
"logits/real": -0.6485167145729065, |
|
"logps/generated": -1184.935546875, |
|
"logps/real": -141.2152099609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.06682586669922, |
|
"rewards/margins": 76.591064453125, |
|
"rewards/real": -0.47575807571411133, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.112559241706161e-07, |
|
"logits/generated": -0.4084410071372986, |
|
"logits/real": -0.5791139602661133, |
|
"logps/generated": -1158.128173828125, |
|
"logps/real": -140.56553649902344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.33256530761719, |
|
"rewards/margins": 73.85154724121094, |
|
"rewards/real": -0.48102912306785583, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.1007109004739336e-07, |
|
"logits/generated": -0.3835846781730652, |
|
"logits/real": -0.6427528262138367, |
|
"logps/generated": -1111.537109375, |
|
"logps/real": -120.40766906738281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.03950500488281, |
|
"rewards/margins": 72.73836517333984, |
|
"rewards/real": -0.3011349141597748, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0888625592417061e-07, |
|
"logits/generated": -0.38520628213882446, |
|
"logits/real": -0.5476213693618774, |
|
"logps/generated": -1175.740966796875, |
|
"logps/real": -150.56874084472656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.73905944824219, |
|
"rewards/margins": 76.24748992919922, |
|
"rewards/real": -0.4915708601474762, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0770142180094787e-07, |
|
"logits/generated": -0.4786599576473236, |
|
"logits/real": -0.6190992593765259, |
|
"logps/generated": -1177.5657958984375, |
|
"logps/real": -171.1073455810547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.37752532958984, |
|
"rewards/margins": 74.78016662597656, |
|
"rewards/real": -0.5973631143569946, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0651658767772511e-07, |
|
"logits/generated": -0.4746522307395935, |
|
"logits/real": -0.6191864609718323, |
|
"logps/generated": -1131.2122802734375, |
|
"logps/real": -170.5677490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.63467407226562, |
|
"rewards/margins": 70.15538024902344, |
|
"rewards/real": -0.4793027341365814, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0533175355450237e-07, |
|
"logits/generated": -0.4551950991153717, |
|
"logits/real": -0.6525193452835083, |
|
"logps/generated": -1122.6358642578125, |
|
"logps/real": -159.02659606933594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.77278137207031, |
|
"rewards/margins": 70.19547271728516, |
|
"rewards/real": -0.5773108005523682, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0414691943127962e-07, |
|
"logits/generated": -0.4288361668586731, |
|
"logits/real": -0.6204260587692261, |
|
"logps/generated": -1192.144775390625, |
|
"logps/real": -163.85079956054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.51756286621094, |
|
"rewards/margins": 74.76011657714844, |
|
"rewards/real": -0.7574476003646851, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0296208530805687e-07, |
|
"logits/generated": -0.4227616786956787, |
|
"logits/real": -0.5756683945655823, |
|
"logps/generated": -1093.92529296875, |
|
"logps/real": -168.95413208007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.02093505859375, |
|
"rewards/margins": 66.26923370361328, |
|
"rewards/real": -0.7517085671424866, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0177725118483411e-07, |
|
"logits/generated": -0.400717556476593, |
|
"logits/real": -0.5438157320022583, |
|
"logps/generated": -1066.676025390625, |
|
"logps/real": -157.6385040283203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.5630874633789, |
|
"rewards/margins": 65.72784423828125, |
|
"rewards/real": -0.8352400064468384, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0059241706161137e-07, |
|
"logits/generated": -0.394951194524765, |
|
"logits/real": -0.5915923118591309, |
|
"logps/generated": -1111.5635986328125, |
|
"logps/real": -151.64712524414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.04698181152344, |
|
"rewards/margins": 70.56211853027344, |
|
"rewards/real": -0.4848620295524597, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.940758293838862e-08, |
|
"logits/generated": -0.3858771026134491, |
|
"logits/real": -0.6399182081222534, |
|
"logps/generated": -1137.333740234375, |
|
"logps/real": -133.06719970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.79722595214844, |
|
"rewards/margins": 71.40068054199219, |
|
"rewards/real": -0.3965340256690979, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.822274881516588e-08, |
|
"logits/generated": -0.40112823247909546, |
|
"logits/real": -0.6340306401252747, |
|
"logps/generated": -1152.3155517578125, |
|
"logps/real": -146.1147918701172, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.04869079589844, |
|
"rewards/margins": 74.36548614501953, |
|
"rewards/real": -0.6832191944122314, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.703791469194312e-08, |
|
"logits/generated": -0.3981134295463562, |
|
"logits/real": -0.615269660949707, |
|
"logps/generated": -1170.2542724609375, |
|
"logps/real": -125.6851577758789, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.76252746582031, |
|
"rewards/margins": 77.17864990234375, |
|
"rewards/real": -0.5838753581047058, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.585308056872038e-08, |
|
"logits/generated": -0.44058480858802795, |
|
"logits/real": -0.5800412893295288, |
|
"logps/generated": -1036.949951171875, |
|
"logps/real": -154.7644500732422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.33032989501953, |
|
"rewards/margins": 64.915283203125, |
|
"rewards/real": -0.41504526138305664, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.466824644549763e-08, |
|
"logits/generated": -0.4272391200065613, |
|
"logits/real": -0.5816367864608765, |
|
"logps/generated": -1235.927734375, |
|
"logps/real": -135.92108154296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -83.16920471191406, |
|
"rewards/margins": 82.63643646240234, |
|
"rewards/real": -0.5327636003494263, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.348341232227488e-08, |
|
"logits/generated": -0.45391201972961426, |
|
"logits/real": -0.5931220054626465, |
|
"logps/generated": -1152.2362060546875, |
|
"logps/real": -149.9822540283203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.86837768554688, |
|
"rewards/margins": 74.22323608398438, |
|
"rewards/real": -0.6451278924942017, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.229857819905212e-08, |
|
"logits/generated": -0.42041435837745667, |
|
"logits/real": -0.5749475359916687, |
|
"logps/generated": -1185.0155029296875, |
|
"logps/real": -165.84779357910156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.07542419433594, |
|
"rewards/margins": 75.43386840820312, |
|
"rewards/real": -0.6415479183197021, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.111374407582938e-08, |
|
"logits/generated": -0.3552473187446594, |
|
"logits/real": -0.5435065031051636, |
|
"logps/generated": -1080.6820068359375, |
|
"logps/real": -149.6726837158203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.79393005371094, |
|
"rewards/margins": 67.18496704101562, |
|
"rewards/real": -0.6089592576026917, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.992890995260663e-08, |
|
"logits/generated": -0.36526188254356384, |
|
"logits/real": -0.542805552482605, |
|
"logps/generated": -1186.941650390625, |
|
"logps/real": -147.6852264404297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.81428527832031, |
|
"rewards/margins": 76.37767791748047, |
|
"rewards/real": -0.4366043210029602, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.874407582938389e-08, |
|
"logits/generated": -0.38974112272262573, |
|
"logits/real": -0.6490163207054138, |
|
"logps/generated": -1125.834716796875, |
|
"logps/real": -153.83132934570312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.05818176269531, |
|
"rewards/margins": 69.58064270019531, |
|
"rewards/real": -0.4775339961051941, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.755924170616114e-08, |
|
"logits/generated": -0.38700738549232483, |
|
"logits/real": -0.5742595195770264, |
|
"logps/generated": -1097.8369140625, |
|
"logps/real": -169.0182342529297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.09275817871094, |
|
"rewards/margins": 68.44459533691406, |
|
"rewards/real": -0.648157000541687, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.63744075829384e-08, |
|
"logits/generated": -0.3959638476371765, |
|
"logits/real": -0.6299315690994263, |
|
"logps/generated": -1148.005859375, |
|
"logps/real": -154.40768432617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.98236846923828, |
|
"rewards/margins": 74.33345794677734, |
|
"rewards/real": -0.6489164233207703, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.518957345971564e-08, |
|
"logits/generated": -0.3766046166419983, |
|
"logits/real": -0.5962399244308472, |
|
"logps/generated": -1100.2664794921875, |
|
"logps/real": -153.56520080566406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.08140563964844, |
|
"rewards/margins": 68.45283508300781, |
|
"rewards/real": -0.6285830736160278, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.40047393364929e-08, |
|
"logits/generated": -0.41984719038009644, |
|
"logits/real": -0.5362011790275574, |
|
"logps/generated": -1218.0125732421875, |
|
"logps/real": -143.78208923339844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -80.89471435546875, |
|
"rewards/margins": 80.50052642822266, |
|
"rewards/real": -0.3941938281059265, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.281990521327013e-08, |
|
"logits/generated": -0.38275301456451416, |
|
"logits/real": -0.6942519545555115, |
|
"logps/generated": -1104.48828125, |
|
"logps/real": -130.95724487304688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.27180480957031, |
|
"rewards/margins": 68.72615051269531, |
|
"rewards/real": -0.5456700921058655, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.163507109004738e-08, |
|
"logits/generated": -0.369687020778656, |
|
"logits/real": -0.594490110874176, |
|
"logps/generated": -1186.750732421875, |
|
"logps/real": -140.19932556152344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.59791564941406, |
|
"rewards/margins": 76.15618133544922, |
|
"rewards/real": -0.44173464179039, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.045023696682464e-08, |
|
"logits/generated": -0.4018821120262146, |
|
"logits/real": -0.6110813617706299, |
|
"logps/generated": -1141.607177734375, |
|
"logps/real": -150.42648315429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.14505767822266, |
|
"rewards/margins": 72.56416320800781, |
|
"rewards/real": -0.5808922052383423, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.926540284360189e-08, |
|
"logits/generated": -0.3948236405849457, |
|
"logits/real": -0.6121063232421875, |
|
"logps/generated": -1248.185302734375, |
|
"logps/real": -166.86651611328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -84.28129577636719, |
|
"rewards/margins": 83.68611907958984, |
|
"rewards/real": -0.5951663851737976, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.808056872037915e-08, |
|
"logits/generated": -0.40597429871559143, |
|
"logits/real": -0.6436026692390442, |
|
"logps/generated": -1255.7674560546875, |
|
"logps/real": -145.66281127929688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -84.02607727050781, |
|
"rewards/margins": 83.40240478515625, |
|
"rewards/real": -0.6236714124679565, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.689573459715639e-08, |
|
"logits/generated": -0.42012372612953186, |
|
"logits/real": -0.5629431009292603, |
|
"logps/generated": -1173.372314453125, |
|
"logps/real": -168.88525390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -79.46331024169922, |
|
"rewards/margins": 78.79911804199219, |
|
"rewards/real": -0.6641994714736938, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.571090047393365e-08, |
|
"logits/generated": -0.41295844316482544, |
|
"logits/real": -0.6122807860374451, |
|
"logps/generated": -1108.602783203125, |
|
"logps/real": -137.6436767578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.95465850830078, |
|
"rewards/margins": 70.28472900390625, |
|
"rewards/real": -0.6699261665344238, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.45260663507109e-08, |
|
"logits/generated": -0.4057750105857849, |
|
"logits/real": -0.6003803610801697, |
|
"logps/generated": -1285.003662109375, |
|
"logps/real": -139.14450073242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -89.37120056152344, |
|
"rewards/margins": 88.8534927368164, |
|
"rewards/real": -0.5177055597305298, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.334123222748814e-08, |
|
"logits/generated": -0.3600274324417114, |
|
"logits/real": -0.5848634243011475, |
|
"logps/generated": -1176.391357421875, |
|
"logps/real": -151.9239959716797, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.63912200927734, |
|
"rewards/margins": 76.92236328125, |
|
"rewards/real": -0.716761589050293, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.215639810426539e-08, |
|
"logits/generated": -0.40445417165756226, |
|
"logits/real": -0.6599612832069397, |
|
"logps/generated": -1055.158203125, |
|
"logps/real": -162.99879455566406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.7494888305664, |
|
"rewards/margins": 66.99449157714844, |
|
"rewards/real": -0.7549879550933838, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.097156398104265e-08, |
|
"logits/generated": -0.368557870388031, |
|
"logits/real": -0.47544288635253906, |
|
"logps/generated": -1161.69970703125, |
|
"logps/real": -145.6846923828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.03729248046875, |
|
"rewards/margins": 74.46896362304688, |
|
"rewards/real": -0.5683245062828064, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.97867298578199e-08, |
|
"logits/generated": -0.4004407823085785, |
|
"logits/real": -0.5904231667518616, |
|
"logps/generated": -1147.255126953125, |
|
"logps/real": -144.21981811523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.1914291381836, |
|
"rewards/margins": 73.42829895019531, |
|
"rewards/real": -0.7631380558013916, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.860189573459716e-08, |
|
"logits/generated": -0.42303165793418884, |
|
"logits/real": -0.6458145380020142, |
|
"logps/generated": -1187.7353515625, |
|
"logps/real": -157.00314331054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.56205749511719, |
|
"rewards/margins": 76.46504211425781, |
|
"rewards/real": -1.0970159769058228, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.74170616113744e-08, |
|
"logits/generated": -0.430379718542099, |
|
"logits/real": -0.595691442489624, |
|
"logps/generated": -1177.614990234375, |
|
"logps/real": -149.47544860839844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.75337219238281, |
|
"rewards/margins": 78.08720397949219, |
|
"rewards/real": -0.666162371635437, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.623222748815166e-08, |
|
"logits/generated": -0.3575670123100281, |
|
"logits/real": -0.5881283283233643, |
|
"logps/generated": -1064.1312255859375, |
|
"logps/real": -139.80990600585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -68.53370666503906, |
|
"rewards/margins": 67.87602996826172, |
|
"rewards/real": -0.6576740741729736, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.504739336492891e-08, |
|
"logits/generated": -0.37114548683166504, |
|
"logits/real": -0.4665864408016205, |
|
"logps/generated": -1096.2255859375, |
|
"logps/real": -132.2154083251953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.34225463867188, |
|
"rewards/margins": 70.71492004394531, |
|
"rewards/real": -0.6273313164710999, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.386255924170615e-08, |
|
"logits/generated": -0.36751076579093933, |
|
"logits/real": -0.5819805264472961, |
|
"logps/generated": -1186.3887939453125, |
|
"logps/real": -146.05813598632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.0079116821289, |
|
"rewards/margins": 77.3697280883789, |
|
"rewards/real": -0.6381850838661194, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.26777251184834e-08, |
|
"logits/generated": -0.414253294467926, |
|
"logits/real": -0.6853745579719543, |
|
"logps/generated": -1162.65234375, |
|
"logps/real": -144.77853393554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.9441146850586, |
|
"rewards/margins": 74.01307678222656, |
|
"rewards/real": -0.931043267250061, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.149289099526066e-08, |
|
"logits/generated": -0.42148175835609436, |
|
"logits/real": -0.6784361600875854, |
|
"logps/generated": -1106.80078125, |
|
"logps/real": -149.71484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.97175598144531, |
|
"rewards/margins": 70.25888061523438, |
|
"rewards/real": -0.7128777503967285, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.030805687203791e-08, |
|
"logits/generated": -0.43376749753952026, |
|
"logits/real": -0.555932879447937, |
|
"logps/generated": -1215.3369140625, |
|
"logps/real": -154.15284729003906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.09486389160156, |
|
"rewards/margins": 77.44525146484375, |
|
"rewards/real": -0.6496086716651917, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.912322274881516e-08, |
|
"logits/generated": -0.4182409346103668, |
|
"logits/real": -0.5983696579933167, |
|
"logps/generated": -1234.3037109375, |
|
"logps/real": -168.16717529296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -84.26101684570312, |
|
"rewards/margins": 83.48126983642578, |
|
"rewards/real": -0.7797611951828003, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.793838862559241e-08, |
|
"logits/generated": -0.445539653301239, |
|
"logits/real": -0.670427680015564, |
|
"logps/generated": -1296.377685546875, |
|
"logps/real": -134.7132568359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -88.41688537597656, |
|
"rewards/margins": 87.923095703125, |
|
"rewards/real": -0.49379315972328186, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6753554502369666e-08, |
|
"logits/generated": -0.3767016530036926, |
|
"logits/real": -0.5589041709899902, |
|
"logps/generated": -1065.5745849609375, |
|
"logps/real": -145.93048095703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -64.40142059326172, |
|
"rewards/margins": 63.82404708862305, |
|
"rewards/real": -0.577372133731842, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.556872037914691e-08, |
|
"logits/generated": -0.41733822226524353, |
|
"logits/real": -0.6436376571655273, |
|
"logps/generated": -1053.798095703125, |
|
"logps/real": -165.2022247314453, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.1214828491211, |
|
"rewards/margins": 64.43299865722656, |
|
"rewards/real": -0.6884856820106506, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4383886255924165e-08, |
|
"logits/generated": -0.4003145098686218, |
|
"logits/real": -0.5307371020317078, |
|
"logps/generated": -1084.7730712890625, |
|
"logps/real": -168.169921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.28605651855469, |
|
"rewards/margins": 66.21138763427734, |
|
"rewards/real": -1.0746623277664185, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.319905213270142e-08, |
|
"logits/generated": -0.4704248011112213, |
|
"logits/real": -0.6616953611373901, |
|
"logps/generated": -1222.5751953125, |
|
"logps/real": -148.44683837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -79.704345703125, |
|
"rewards/margins": 79.09273529052734, |
|
"rewards/real": -0.6116179823875427, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.201421800947867e-08, |
|
"logits/generated": -0.4243236482143402, |
|
"logits/real": -0.6298291683197021, |
|
"logps/generated": -1416.729736328125, |
|
"logps/real": -142.0489959716797, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -100.81539154052734, |
|
"rewards/margins": 100.10444641113281, |
|
"rewards/real": -0.710952639579773, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.082938388625592e-08, |
|
"logits/generated": -0.4102560579776764, |
|
"logits/real": -0.5691145658493042, |
|
"logps/generated": -1115.2730712890625, |
|
"logps/real": -159.29409790039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -70.8335952758789, |
|
"rewards/margins": 70.12803649902344, |
|
"rewards/real": -0.7055586576461792, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.964454976303317e-08, |
|
"logits/generated": -0.4029023051261902, |
|
"logits/real": -0.6073136329650879, |
|
"logps/generated": -1238.347900390625, |
|
"logps/real": -143.02938842773438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -82.9701156616211, |
|
"rewards/margins": 82.27519226074219, |
|
"rewards/real": -0.6949158906936646, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.845971563981042e-08, |
|
"logits/generated": -0.42402610182762146, |
|
"logits/real": -0.6584951877593994, |
|
"logps/generated": -1118.9874267578125, |
|
"logps/real": -145.93138122558594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.63047790527344, |
|
"rewards/margins": 72.98558044433594, |
|
"rewards/real": -0.644890546798706, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.7274881516587676e-08, |
|
"logits/generated": -0.3934037685394287, |
|
"logits/real": -0.6283634901046753, |
|
"logps/generated": -1259.203369140625, |
|
"logps/real": -153.5712432861328, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -86.28514099121094, |
|
"rewards/margins": 85.61091613769531, |
|
"rewards/real": -0.6742227673530579, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.609004739336492e-08, |
|
"logits/generated": -0.4452149271965027, |
|
"logits/real": -0.562778115272522, |
|
"logps/generated": -1178.203369140625, |
|
"logps/real": -167.87570190429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.86197662353516, |
|
"rewards/margins": 76.29110717773438, |
|
"rewards/real": -0.5708707571029663, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.4905213270142176e-08, |
|
"logits/generated": -0.46924668550491333, |
|
"logits/real": -0.5640957355499268, |
|
"logps/generated": -1178.0283203125, |
|
"logps/real": -152.62841796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.18916320800781, |
|
"rewards/margins": 74.46082305908203, |
|
"rewards/real": -0.7283350229263306, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.372037914691943e-08, |
|
"logits/generated": -0.4569918215274811, |
|
"logits/real": -0.6253639459609985, |
|
"logps/generated": -1141.238525390625, |
|
"logps/real": -159.03335571289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.83621215820312, |
|
"rewards/margins": 72.23777770996094, |
|
"rewards/real": -0.5984372496604919, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.253554502369668e-08, |
|
"logits/generated": -0.3836295008659363, |
|
"logits/real": -0.5883413553237915, |
|
"logps/generated": -1028.1627197265625, |
|
"logps/real": -152.56336975097656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.914405822753906, |
|
"rewards/margins": 62.3019905090332, |
|
"rewards/real": -0.6124156713485718, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.135071090047393e-08, |
|
"logits/generated": -0.43586069345474243, |
|
"logits/real": -0.584862232208252, |
|
"logps/generated": -1256.212646484375, |
|
"logps/real": -182.5783233642578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -84.49156951904297, |
|
"rewards/margins": 83.60160064697266, |
|
"rewards/real": -0.8899722099304199, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.016587677725118e-08, |
|
"logits/generated": -0.42249807715415955, |
|
"logits/real": -0.6765289902687073, |
|
"logps/generated": -1098.554443359375, |
|
"logps/real": -149.42665100097656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.22462463378906, |
|
"rewards/margins": 68.48421478271484, |
|
"rewards/real": -0.740403950214386, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.8981042654028434e-08, |
|
"logits/generated": -0.3722071051597595, |
|
"logits/real": -0.6433338522911072, |
|
"logps/generated": -1152.187255859375, |
|
"logps/real": -135.0558624267578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.83736419677734, |
|
"rewards/margins": 71.99942016601562, |
|
"rewards/real": -0.8379424810409546, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.779620853080569e-08, |
|
"logits/generated": -0.4465080797672272, |
|
"logits/real": -0.6853441596031189, |
|
"logps/generated": -1219.987548828125, |
|
"logps/real": -149.74484252929688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.68904876708984, |
|
"rewards/margins": 81.03762817382812, |
|
"rewards/real": -0.6514285802841187, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.661137440758294e-08, |
|
"logits/generated": -0.39111563563346863, |
|
"logits/real": -0.62447589635849, |
|
"logps/generated": -1110.0286865234375, |
|
"logps/real": -130.0288543701172, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.36956787109375, |
|
"rewards/margins": 72.62205505371094, |
|
"rewards/real": -0.7475109696388245, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.5426540284360186e-08, |
|
"logits/generated": -0.3793131709098816, |
|
"logits/real": -0.6151835322380066, |
|
"logps/generated": -1089.3216552734375, |
|
"logps/real": -138.77415466308594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.62622833251953, |
|
"rewards/margins": 67.07869720458984, |
|
"rewards/real": -0.5475287437438965, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.424170616113744e-08, |
|
"logits/generated": -0.41481703519821167, |
|
"logits/real": -0.6037416458129883, |
|
"logps/generated": -1118.58349609375, |
|
"logps/real": -156.24639892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.81546783447266, |
|
"rewards/margins": 71.1466293334961, |
|
"rewards/real": -0.6688372492790222, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.305687203791469e-08, |
|
"logits/generated": -0.38786306977272034, |
|
"logits/real": -0.6748972535133362, |
|
"logps/generated": -1132.7213134765625, |
|
"logps/real": -165.26429748535156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.00263977050781, |
|
"rewards/margins": 73.53857421875, |
|
"rewards/real": -0.4640537202358246, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1872037914691945e-08, |
|
"logits/generated": -0.3895108699798584, |
|
"logits/real": -0.5759503245353699, |
|
"logps/generated": -1006.7896728515625, |
|
"logps/real": -157.9845733642578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -62.23859786987305, |
|
"rewards/margins": 61.38972091674805, |
|
"rewards/real": -0.8488828539848328, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.068720379146919e-08, |
|
"logits/generated": -0.3578011691570282, |
|
"logits/real": -0.5436447262763977, |
|
"logps/generated": -1215.705078125, |
|
"logps/real": -147.42477416992188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -80.29849243164062, |
|
"rewards/margins": 79.59217071533203, |
|
"rewards/real": -0.7063143253326416, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9502369668246444e-08, |
|
"logits/generated": -0.41877445578575134, |
|
"logits/real": -0.5616232752799988, |
|
"logps/generated": -1106.929931640625, |
|
"logps/real": -175.37808227539062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.43008422851562, |
|
"rewards/margins": 70.78630065917969, |
|
"rewards/real": -0.6437833905220032, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8317535545023697e-08, |
|
"logits/generated": -0.42272821068763733, |
|
"logits/real": -0.6309406161308289, |
|
"logps/generated": -1138.3070068359375, |
|
"logps/real": -144.12118530273438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.61137390136719, |
|
"rewards/margins": 72.8843994140625, |
|
"rewards/real": -0.7269810438156128, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7132701421800947e-08, |
|
"logits/generated": -0.37885454297065735, |
|
"logits/real": -0.6365025043487549, |
|
"logps/generated": -1212.714111328125, |
|
"logps/real": -144.41900634765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.62388610839844, |
|
"rewards/margins": 77.66529083251953, |
|
"rewards/real": -0.9586065411567688, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.59478672985782e-08, |
|
"logits/generated": -0.4420732855796814, |
|
"logits/real": -0.5323609113693237, |
|
"logps/generated": -1076.71533203125, |
|
"logps/real": -181.2960968017578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.17323303222656, |
|
"rewards/margins": 64.45045471191406, |
|
"rewards/real": -0.7227771878242493, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.476303317535545e-08, |
|
"logits/generated": -0.38568204641342163, |
|
"logits/real": -0.5752898454666138, |
|
"logps/generated": -1061.8555908203125, |
|
"logps/real": -170.11355590820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.07365417480469, |
|
"rewards/margins": 65.37210845947266, |
|
"rewards/real": -0.7015471458435059, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3578199052132702e-08, |
|
"logits/generated": -0.4010697901248932, |
|
"logits/real": -0.5733628273010254, |
|
"logps/generated": -1116.184326171875, |
|
"logps/real": -161.65103149414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -71.5921630859375, |
|
"rewards/margins": 71.0265121459961, |
|
"rewards/real": -0.5656577944755554, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.239336492890995e-08, |
|
"logits/generated": -0.4115443229675293, |
|
"logits/real": -0.5424914360046387, |
|
"logps/generated": -1180.7628173828125, |
|
"logps/real": -185.47634887695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.3253402709961, |
|
"rewards/margins": 76.40950012207031, |
|
"rewards/real": -0.9158375859260559, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1208530805687202e-08, |
|
"logits/generated": -0.42339619994163513, |
|
"logits/real": -0.6375981569290161, |
|
"logps/generated": -1170.9703369140625, |
|
"logps/real": -131.23190307617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.03814697265625, |
|
"rewards/margins": 76.52554321289062, |
|
"rewards/real": -0.5126058459281921, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.002369668246445e-08, |
|
"logits/generated": -0.42008423805236816, |
|
"logits/real": -0.6603757739067078, |
|
"logps/generated": -1106.7174072265625, |
|
"logps/real": -132.1379852294922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.35577392578125, |
|
"rewards/margins": 68.54685974121094, |
|
"rewards/real": -0.8089267611503601, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8838862559241704e-08, |
|
"logits/generated": -0.4159209132194519, |
|
"logits/real": -0.6070636510848999, |
|
"logps/generated": -1108.8184814453125, |
|
"logps/real": -174.39251708984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.12606811523438, |
|
"rewards/margins": 68.53681945800781, |
|
"rewards/real": -0.5892479419708252, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7654028436018954e-08, |
|
"logits/generated": -0.41173315048217773, |
|
"logits/real": -0.48643770813941956, |
|
"logps/generated": -1070.396728515625, |
|
"logps/real": -191.44134521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -65.99903869628906, |
|
"rewards/margins": 65.2906265258789, |
|
"rewards/real": -0.7084180116653442, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6469194312796207e-08, |
|
"logits/generated": -0.3698303997516632, |
|
"logits/real": -0.6320183277130127, |
|
"logps/generated": -1076.8929443359375, |
|
"logps/real": -139.63314819335938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.975830078125, |
|
"rewards/margins": 66.27364349365234, |
|
"rewards/real": -0.7021877765655518, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.528436018957346e-08, |
|
"logits/generated": -0.3886292278766632, |
|
"logits/real": -0.615139365196228, |
|
"logps/generated": -1140.2864990234375, |
|
"logps/real": -145.85720825195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.76911163330078, |
|
"rewards/margins": 74.237060546875, |
|
"rewards/real": -0.5320545434951782, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.409952606635071e-08, |
|
"logits/generated": -0.4748724400997162, |
|
"logits/real": -0.6143825650215149, |
|
"logps/generated": -1171.03076171875, |
|
"logps/real": -139.5336151123047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.67819213867188, |
|
"rewards/margins": 72.13270568847656, |
|
"rewards/real": -0.5454872846603394, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2914691943127961e-08, |
|
"logits/generated": -0.39133062958717346, |
|
"logits/real": -0.6907501816749573, |
|
"logps/generated": -1139.4451904296875, |
|
"logps/real": -122.94720458984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -73.89988708496094, |
|
"rewards/margins": 73.2684097290039, |
|
"rewards/real": -0.6314736604690552, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1729857819905212e-08, |
|
"logits/generated": -0.45327988266944885, |
|
"logits/real": -0.6570634245872498, |
|
"logps/generated": -1196.7115478515625, |
|
"logps/real": -150.64051818847656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.87831115722656, |
|
"rewards/margins": 77.22245788574219, |
|
"rewards/real": -0.6558529138565063, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0545023696682464e-08, |
|
"logits/generated": -0.43308648467063904, |
|
"logits/real": -0.6817704439163208, |
|
"logps/generated": -1163.740966796875, |
|
"logps/real": -139.56373596191406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.80686950683594, |
|
"rewards/margins": 76.22025299072266, |
|
"rewards/real": -0.5866076350212097, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.360189573459715e-09, |
|
"logits/generated": -0.38277140259742737, |
|
"logits/real": -0.6559278964996338, |
|
"logps/generated": -1152.90625, |
|
"logps/real": -166.77056884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.07879638671875, |
|
"rewards/margins": 73.38710021972656, |
|
"rewards/real": -0.6916946172714233, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.175355450236966e-09, |
|
"logits/generated": -0.44269418716430664, |
|
"logits/real": -0.6459895968437195, |
|
"logps/generated": -1183.922607421875, |
|
"logps/real": -158.46505737304688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.86439514160156, |
|
"rewards/margins": 77.26386260986328, |
|
"rewards/real": -0.6005492806434631, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.990521327014218e-09, |
|
"logits/generated": -0.42995685338974, |
|
"logits/real": -0.6797999739646912, |
|
"logps/generated": -1169.283447265625, |
|
"logps/real": -131.04364013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.65510559082031, |
|
"rewards/margins": 75.0411376953125, |
|
"rewards/real": -0.6139676570892334, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.805687203791469e-09, |
|
"logits/generated": -0.4121777415275574, |
|
"logits/real": -0.6116447448730469, |
|
"logps/generated": -1070.22314453125, |
|
"logps/real": -140.3560333251953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.28459167480469, |
|
"rewards/margins": 65.62239074707031, |
|
"rewards/real": -0.6622053384780884, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.62085308056872e-09, |
|
"logits/generated": -0.4175810217857361, |
|
"logits/real": -0.5604298114776611, |
|
"logps/generated": -1161.98583984375, |
|
"logps/real": -162.33438110351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -74.65108489990234, |
|
"rewards/margins": 74.18379211425781, |
|
"rewards/real": -0.4672994017601013, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.4360189573459714e-09, |
|
"logits/generated": -0.3966117799282074, |
|
"logits/real": -0.5953450202941895, |
|
"logps/generated": -1079.524169921875, |
|
"logps/real": -167.3221435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -69.10150909423828, |
|
"rewards/margins": 68.49336242675781, |
|
"rewards/real": -0.6081460118293762, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.2511848341232227e-09, |
|
"logits/generated": -0.4167659282684326, |
|
"logits/real": -0.6169396042823792, |
|
"logps/generated": -1015.7071533203125, |
|
"logps/real": -176.66506958007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -61.38505935668945, |
|
"rewards/margins": 60.835716247558594, |
|
"rewards/real": -0.5493378043174744, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.0663507109004738e-09, |
|
"logits/generated": -0.3958420157432556, |
|
"logits/real": -0.5675554275512695, |
|
"logps/generated": -1055.50341796875, |
|
"logps/real": -149.56039428710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -66.84716796875, |
|
"rewards/margins": 66.41246032714844, |
|
"rewards/real": -0.4347153604030609, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4689, |
|
"total_flos": 0.0, |
|
"train_loss": 0.010306433322205334, |
|
"train_runtime": 33629.5111, |
|
"train_samples_per_second": 4.46, |
|
"train_steps_per_second": 0.139 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4689, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|