diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6596 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 4689, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.066098081023454e-09, + "logits/generated": -0.6986645460128784, + "logits/real": -0.9474660754203796, + "logps/generated": -378.9501953125, + "logps/real": -127.2445068359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 1.0660980810234541e-08, + "logits/generated": -0.7292745113372803, + "logits/real": -0.9080196022987366, + "logps/generated": -411.4975280761719, + "logps/real": -136.8819122314453, + "loss": 0.6994, + "rewards/accuracies": 0.4305555522441864, + "rewards/generated": -0.00470572616904974, + "rewards/margins": 0.0034854437690228224, + "rewards/real": -0.0012202821671962738, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 2.1321961620469082e-08, + "logits/generated": -0.7172996997833252, + "logits/real": -0.8902201652526855, + "logps/generated": -425.13238525390625, + "logps/real": -146.6293182373047, + "loss": 0.6822, + "rewards/accuracies": 0.5625, + "rewards/generated": -0.04359797015786171, + "rewards/margins": 0.047350525856018066, + "rewards/real": 0.003752560820430517, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 3.1982942430703625e-08, + "logits/generated": -0.7285000681877136, + "logits/real": -0.9076566696166992, + "logps/generated": -390.41241455078125, + "logps/real": -124.2341537475586, + "loss": 0.6261, + "rewards/accuracies": 0.875, + "rewards/generated": -0.1323629468679428, + "rewards/margins": 0.12934455275535583, + "rewards/real": -0.003018400864675641, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 4.2643923240938164e-08, + "logits/generated": -0.7392301559448242, + "logits/real": -0.8854039311408997, + "logps/generated": -411.90313720703125, + "logps/real": -156.40731811523438, + "loss": 0.4991, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -0.507174551486969, + "rewards/margins": 0.4850993752479553, + "rewards/real": -0.022075189277529716, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 5.3304904051172704e-08, + "logits/generated": -0.7073003053665161, + "logits/real": -0.8917710185050964, + "logps/generated": -438.045654296875, + "logps/real": -150.26516723632812, + "loss": 0.3997, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -0.9068630337715149, + "rewards/margins": 0.8774474263191223, + "rewards/real": -0.029415583238005638, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 6.396588486140725e-08, + "logits/generated": -0.683810830116272, + "logits/real": -0.8957662582397461, + "logps/generated": -449.07598876953125, + "logps/real": -147.19796752929688, + "loss": 0.222, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -1.9475853443145752, + "rewards/margins": 1.8720191717147827, + "rewards/real": -0.07556610554456711, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 7.462686567164178e-08, + "logits/generated": -0.6989277601242065, + "logits/real": -0.826370894908905, + "logps/generated": -423.2393493652344, + "logps/real": -161.0751190185547, + "loss": 0.1887, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -2.234829902648926, + "rewards/margins": 2.123378038406372, + "rewards/real": -0.1114521399140358, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 8.528784648187633e-08, + "logits/generated": -0.7299633622169495, + "logits/real": -0.8775332570075989, + "logps/generated": -423.95709228515625, + "logps/real": -147.74581909179688, + "loss": 0.1788, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -2.318953037261963, + "rewards/margins": 2.218860149383545, + "rewards/real": -0.10009302943944931, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 9.594882729211087e-08, + "logits/generated": -0.7246233224868774, + "logits/real": -0.9104539752006531, + "logps/generated": -413.73638916015625, + "logps/real": -143.83090209960938, + "loss": 0.1662, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -2.487647533416748, + "rewards/margins": 2.347053289413452, + "rewards/real": -0.1405942738056183, + "step": 90 + }, + { + "epoch": 0.06, + "learning_rate": 1.0660980810234541e-07, + "logits/generated": -0.7521112561225891, + "logits/real": -0.8818603754043579, + "logps/generated": -452.70709228515625, + "logps/real": -162.36923217773438, + "loss": 0.1114, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -3.276683807373047, + "rewards/margins": 3.1224722862243652, + "rewards/real": -0.15421171486377716, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 1.1727078891257995e-07, + "logits/generated": -0.7396367788314819, + "logits/real": -0.8340644836425781, + "logps/generated": -441.98297119140625, + "logps/real": -156.94149780273438, + "loss": 0.1072, + "rewards/accuracies": 1.0, + "rewards/generated": -3.906806230545044, + "rewards/margins": 3.7491652965545654, + "rewards/real": -0.15764120221138, + "step": 110 + }, + { + "epoch": 0.08, + "learning_rate": 1.279317697228145e-07, + "logits/generated": -0.7582974433898926, + "logits/real": -0.9137656092643738, + "logps/generated": -457.762451171875, + "logps/real": -158.60690307617188, + "loss": 0.0746, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -4.984735012054443, + "rewards/margins": 4.785731315612793, + "rewards/real": -0.19900405406951904, + "step": 120 + }, + { + "epoch": 0.08, + "learning_rate": 1.3859275053304903e-07, + "logits/generated": -0.7004902958869934, + "logits/real": -0.8594983220100403, + "logps/generated": -452.61187744140625, + "logps/real": -145.72874450683594, + "loss": 0.0816, + "rewards/accuracies": 1.0, + "rewards/generated": -5.79620361328125, + "rewards/margins": 5.550940036773682, + "rewards/real": -0.24526312947273254, + "step": 130 + }, + { + "epoch": 0.09, + "learning_rate": 1.4925373134328355e-07, + "logits/generated": -0.7274501919746399, + "logits/real": -0.9405637979507446, + "logps/generated": -462.9717712402344, + "logps/real": -147.3827667236328, + "loss": 0.0653, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -6.3193254470825195, + "rewards/margins": 6.078797340393066, + "rewards/real": -0.2405281811952591, + "step": 140 + }, + { + "epoch": 0.1, + "learning_rate": 1.5991471215351813e-07, + "logits/generated": -0.7242128849029541, + "logits/real": -0.9140293002128601, + "logps/generated": -474.15216064453125, + "logps/real": -157.3037567138672, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/generated": -6.953827857971191, + "rewards/margins": 6.571684837341309, + "rewards/real": -0.38214248418807983, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 1.7057569296375266e-07, + "logits/generated": -0.7147258520126343, + "logits/real": -0.8815923929214478, + "logps/generated": -457.26519775390625, + "logps/real": -142.1988067626953, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/generated": -6.94360876083374, + "rewards/margins": 6.643794059753418, + "rewards/real": -0.29981470108032227, + "step": 160 + }, + { + "epoch": 0.11, + "learning_rate": 1.8123667377398718e-07, + "logits/generated": -0.7313283085823059, + "logits/real": -0.8958312273025513, + "logps/generated": -466.23126220703125, + "logps/real": -156.481201171875, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/generated": -7.367417812347412, + "rewards/margins": 7.1352057456970215, + "rewards/real": -0.23221150040626526, + "step": 170 + }, + { + "epoch": 0.12, + "learning_rate": 1.9189765458422174e-07, + "logits/generated": -0.7426391839981079, + "logits/real": -0.8894122838973999, + "logps/generated": -474.2499084472656, + "logps/real": -148.93968200683594, + "loss": 0.045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -7.244542121887207, + "rewards/margins": 6.9434709548950195, + "rewards/real": -0.30106985569000244, + "step": 180 + }, + { + "epoch": 0.12, + "learning_rate": 2.025586353944563e-07, + "logits/generated": -0.7155178785324097, + "logits/real": -0.8063043355941772, + "logps/generated": -488.8700256347656, + "logps/real": -178.33253479003906, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/generated": -7.651026248931885, + "rewards/margins": 7.362033843994141, + "rewards/real": -0.28899192810058594, + "step": 190 + }, + { + "epoch": 0.13, + "learning_rate": 2.1321961620469082e-07, + "logits/generated": -0.7116974592208862, + "logits/real": -0.8972026705741882, + "logps/generated": -475.73089599609375, + "logps/real": -143.96710205078125, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/generated": -8.454703330993652, + "rewards/margins": 8.173591613769531, + "rewards/real": -0.28111228346824646, + "step": 200 + }, + { + "epoch": 0.13, + "learning_rate": 2.2388059701492537e-07, + "logits/generated": -0.6575301885604858, + "logits/real": -0.8355759382247925, + "logps/generated": -517.0919799804688, + "logps/real": -158.15267944335938, + "loss": 0.0261, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -8.714963912963867, + "rewards/margins": 8.384611129760742, + "rewards/real": -0.3303532600402832, + "step": 210 + }, + { + "epoch": 0.14, + "learning_rate": 2.345415778251599e-07, + "logits/generated": -0.6825748085975647, + "logits/real": -0.9428423643112183, + "logps/generated": -481.72381591796875, + "logps/real": -139.2545166015625, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/generated": -7.92175817489624, + "rewards/margins": 7.7295966148376465, + "rewards/real": -0.19216081500053406, + "step": 220 + }, + { + "epoch": 0.15, + "learning_rate": 2.452025586353944e-07, + "logits/generated": -0.7176781892776489, + "logits/real": -0.8804994821548462, + "logps/generated": -505.34423828125, + "logps/real": -167.8238983154297, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/generated": -9.55711555480957, + "rewards/margins": 9.215084075927734, + "rewards/real": -0.34203046560287476, + "step": 230 + }, + { + "epoch": 0.15, + "learning_rate": 2.55863539445629e-07, + "logits/generated": -0.7456918358802795, + "logits/real": -0.9094620943069458, + "logps/generated": -521.493408203125, + "logps/real": -168.6183319091797, + "loss": 0.0309, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.619193077087402, + "rewards/margins": 9.36630916595459, + "rewards/real": -0.2528838515281677, + "step": 240 + }, + { + "epoch": 0.16, + "learning_rate": 2.665245202558635e-07, + "logits/generated": -0.7071075439453125, + "logits/real": -0.9580531120300293, + "logps/generated": -492.2247009277344, + "logps/real": -122.14371490478516, + "loss": 0.0247, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.554067611694336, + "rewards/margins": 9.357695579528809, + "rewards/real": -0.19637097418308258, + "step": 250 + }, + { + "epoch": 0.17, + "learning_rate": 2.7718550106609805e-07, + "logits/generated": -0.6865926384925842, + "logits/real": -0.9246328473091125, + "logps/generated": -499.01458740234375, + "logps/real": -112.85029602050781, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/generated": -9.010147094726562, + "rewards/margins": 8.914213180541992, + "rewards/real": -0.0959334522485733, + "step": 260 + }, + { + "epoch": 0.17, + "learning_rate": 2.878464818763326e-07, + "logits/generated": -0.7495471239089966, + "logits/real": -0.9227398037910461, + "logps/generated": -492.08404541015625, + "logps/real": -152.34420776367188, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/generated": -9.229729652404785, + "rewards/margins": 9.07396125793457, + "rewards/real": -0.15576975047588348, + "step": 270 + }, + { + "epoch": 0.18, + "learning_rate": 2.985074626865671e-07, + "logits/generated": -0.7224324941635132, + "logits/real": -0.9130092859268188, + "logps/generated": -490.20587158203125, + "logps/real": -147.02711486816406, + "loss": 0.0299, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.046536445617676, + "rewards/margins": 8.867794036865234, + "rewards/real": -0.17874157428741455, + "step": 280 + }, + { + "epoch": 0.19, + "learning_rate": 3.0916844349680174e-07, + "logits/generated": -0.6551756262779236, + "logits/real": -0.856887698173523, + "logps/generated": -494.3564453125, + "logps/real": -127.67320251464844, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/generated": -9.807455062866211, + "rewards/margins": 9.784029960632324, + "rewards/real": -0.02342619001865387, + "step": 290 + }, + { + "epoch": 0.19, + "learning_rate": 3.1982942430703626e-07, + "logits/generated": -0.7393316030502319, + "logits/real": -0.8498824238777161, + "logps/generated": -507.4498596191406, + "logps/real": -155.37051391601562, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/generated": -10.729459762573242, + "rewards/margins": 10.669529914855957, + "rewards/real": -0.05992986634373665, + "step": 300 + }, + { + "epoch": 0.2, + "learning_rate": 3.304904051172708e-07, + "logits/generated": -0.6794149875640869, + "logits/real": -0.8570221066474915, + "logps/generated": -516.3793334960938, + "logps/real": -147.1600799560547, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/generated": -11.062250137329102, + "rewards/margins": 10.987607955932617, + "rewards/real": -0.07464051991701126, + "step": 310 + }, + { + "epoch": 0.2, + "learning_rate": 3.411513859275053e-07, + "logits/generated": -0.7035683393478394, + "logits/real": -0.8863167762756348, + "logps/generated": -511.3915100097656, + "logps/real": -155.48513793945312, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/generated": -10.503643989562988, + "rewards/margins": 10.356426239013672, + "rewards/real": -0.1472179889678955, + "step": 320 + }, + { + "epoch": 0.21, + "learning_rate": 3.5181236673773984e-07, + "logits/generated": -0.6560925245285034, + "logits/real": -0.8542153239250183, + "logps/generated": -549.3557739257812, + "logps/real": -134.815185546875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/generated": -13.663198471069336, + "rewards/margins": 13.555437088012695, + "rewards/real": -0.10776337236166, + "step": 330 + }, + { + "epoch": 0.22, + "learning_rate": 3.6247334754797437e-07, + "logits/generated": -0.6811632513999939, + "logits/real": -0.9368340373039246, + "logps/generated": -537.3572387695312, + "logps/real": -134.58053588867188, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/generated": -13.095390319824219, + "rewards/margins": 12.888254165649414, + "rewards/real": -0.20713606476783752, + "step": 340 + }, + { + "epoch": 0.22, + "learning_rate": 3.7313432835820895e-07, + "logits/generated": -0.7272433042526245, + "logits/real": -0.9455550909042358, + "logps/generated": -553.9634399414062, + "logps/real": -131.4666748046875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/generated": -13.890289306640625, + "rewards/margins": 13.677050590515137, + "rewards/real": -0.21323621273040771, + "step": 350 + }, + { + "epoch": 0.23, + "learning_rate": 3.8379530916844347e-07, + "logits/generated": -0.7156924605369568, + "logits/real": -0.8684523701667786, + "logps/generated": -536.7255859375, + "logps/real": -173.733154296875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/generated": -13.108613967895508, + "rewards/margins": 12.777644157409668, + "rewards/real": -0.33096957206726074, + "step": 360 + }, + { + "epoch": 0.24, + "learning_rate": 3.9445628997867805e-07, + "logits/generated": -0.6920727491378784, + "logits/real": -0.851031482219696, + "logps/generated": -580.4476318359375, + "logps/real": -153.07241821289062, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/generated": -14.45750617980957, + "rewards/margins": 14.292282104492188, + "rewards/real": -0.16522422432899475, + "step": 370 + }, + { + "epoch": 0.24, + "learning_rate": 4.051172707889126e-07, + "logits/generated": -0.7242365479469299, + "logits/real": -0.9248291254043579, + "logps/generated": -566.2769775390625, + "logps/real": -136.8095245361328, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/generated": -14.296048164367676, + "rewards/margins": 14.1116361618042, + "rewards/real": -0.18441154062747955, + "step": 380 + }, + { + "epoch": 0.25, + "learning_rate": 4.157782515991471e-07, + "logits/generated": -0.7522596120834351, + "logits/real": -0.9050644636154175, + "logps/generated": -568.7861328125, + "logps/real": -143.46664428710938, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/generated": -16.536224365234375, + "rewards/margins": 16.37632942199707, + "rewards/real": -0.15989510715007782, + "step": 390 + }, + { + "epoch": 0.26, + "learning_rate": 4.2643923240938163e-07, + "logits/generated": -0.7549287676811218, + "logits/real": -0.944291889667511, + "logps/generated": -558.4595947265625, + "logps/real": -138.44850158691406, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/generated": -16.336772918701172, + "rewards/margins": 16.19559669494629, + "rewards/real": -0.14117594063282013, + "step": 400 + }, + { + "epoch": 0.26, + "learning_rate": 4.371002132196162e-07, + "logits/generated": -0.7194818258285522, + "logits/real": -0.9151653051376343, + "logps/generated": -603.3551025390625, + "logps/real": -128.14102172851562, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -17.891334533691406, + "rewards/margins": 17.594852447509766, + "rewards/real": -0.2964830994606018, + "step": 410 + }, + { + "epoch": 0.27, + "learning_rate": 4.4776119402985074e-07, + "logits/generated": -0.7237090468406677, + "logits/real": -0.876343846321106, + "logps/generated": -554.85302734375, + "logps/real": -144.67776489257812, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/generated": -15.776510238647461, + "rewards/margins": 15.578184127807617, + "rewards/real": -0.19832463562488556, + "step": 420 + }, + { + "epoch": 0.28, + "learning_rate": 4.5842217484008526e-07, + "logits/generated": -0.6880273818969727, + "logits/real": -0.8874770402908325, + "logps/generated": -574.5953369140625, + "logps/real": -144.68075561523438, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -16.195022583007812, + "rewards/margins": 16.028963088989258, + "rewards/real": -0.16606178879737854, + "step": 430 + }, + { + "epoch": 0.28, + "learning_rate": 4.690831556503198e-07, + "logits/generated": -0.6841549873352051, + "logits/real": -0.8916375041007996, + "logps/generated": -601.9527587890625, + "logps/real": -154.18507385253906, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/generated": -17.76523208618164, + "rewards/margins": 17.5986270904541, + "rewards/real": -0.16660475730895996, + "step": 440 + }, + { + "epoch": 0.29, + "learning_rate": 4.797441364605543e-07, + "logits/generated": -0.6988117694854736, + "logits/real": -0.8081305623054504, + "logps/generated": -610.0335693359375, + "logps/real": -151.32000732421875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/generated": -19.002681732177734, + "rewards/margins": 18.782581329345703, + "rewards/real": -0.22010159492492676, + "step": 450 + }, + { + "epoch": 0.29, + "learning_rate": 4.904051172707888e-07, + "logits/generated": -0.696639895439148, + "logits/real": -0.9278604388237, + "logps/generated": -585.9586791992188, + "logps/real": -161.8017120361328, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/generated": -17.64035415649414, + "rewards/margins": 17.357894897460938, + "rewards/real": -0.282459557056427, + "step": 460 + }, + { + "epoch": 0.3, + "learning_rate": 4.998815165876776e-07, + "logits/generated": -0.6872554421424866, + "logits/real": -0.9127834439277649, + "logps/generated": -568.6585693359375, + "logps/real": -129.33038330078125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -17.379060745239258, + "rewards/margins": 17.275171279907227, + "rewards/real": -0.10388918966054916, + "step": 470 + }, + { + "epoch": 0.31, + "learning_rate": 4.98696682464455e-07, + "logits/generated": -0.699679970741272, + "logits/real": -0.8975842595100403, + "logps/generated": -584.9615478515625, + "logps/real": -152.40818786621094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/generated": -18.11277961730957, + "rewards/margins": 17.817615509033203, + "rewards/real": -0.2951619029045105, + "step": 480 + }, + { + "epoch": 0.31, + "learning_rate": 4.975118483412322e-07, + "logits/generated": -0.7286016941070557, + "logits/real": -0.8225492238998413, + "logps/generated": -618.4642333984375, + "logps/real": -168.58460998535156, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/generated": -19.85270118713379, + "rewards/margins": 19.50424575805664, + "rewards/real": -0.34845709800720215, + "step": 490 + }, + { + "epoch": 0.32, + "learning_rate": 4.963270142180094e-07, + "logits/generated": -0.7258303761482239, + "logits/real": -0.9152861833572388, + "logps/generated": -578.2322387695312, + "logps/real": -137.53619384765625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -19.372339248657227, + "rewards/margins": 19.179141998291016, + "rewards/real": -0.19319558143615723, + "step": 500 + }, + { + "epoch": 0.33, + "learning_rate": 4.951421800947867e-07, + "logits/generated": -0.7013474702835083, + "logits/real": -0.8657256960868835, + "logps/generated": -624.0083618164062, + "logps/real": -149.85691833496094, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/generated": -22.289051055908203, + "rewards/margins": 21.97234344482422, + "rewards/real": -0.3167068660259247, + "step": 510 + }, + { + "epoch": 0.33, + "learning_rate": 4.93957345971564e-07, + "logits/generated": -0.7635418772697449, + "logits/real": -0.868754506111145, + "logps/generated": -628.0731201171875, + "logps/real": -171.21641540527344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -22.14756965637207, + "rewards/margins": 21.83392333984375, + "rewards/real": -0.31364530324935913, + "step": 520 + }, + { + "epoch": 0.34, + "learning_rate": 4.927725118483413e-07, + "logits/generated": -0.7472074031829834, + "logits/real": -0.9306868314743042, + "logps/generated": -664.8667602539062, + "logps/real": -160.69815063476562, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -25.677719116210938, + "rewards/margins": 25.49850082397461, + "rewards/real": -0.17921803891658783, + "step": 530 + }, + { + "epoch": 0.35, + "learning_rate": 4.915876777251184e-07, + "logits/generated": -0.6238476037979126, + "logits/real": -0.78472900390625, + "logps/generated": -606.1143798828125, + "logps/real": -158.46510314941406, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/generated": -20.29648208618164, + "rewards/margins": 19.908735275268555, + "rewards/real": -0.3877467215061188, + "step": 540 + }, + { + "epoch": 0.35, + "learning_rate": 4.904028436018957e-07, + "logits/generated": -0.6451541185379028, + "logits/real": -0.8735024333000183, + "logps/generated": -645.0818481445312, + "logps/real": -131.34632873535156, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -22.540191650390625, + "rewards/margins": 22.381816864013672, + "rewards/real": -0.15837618708610535, + "step": 550 + }, + { + "epoch": 0.36, + "learning_rate": 4.892180094786729e-07, + "logits/generated": -0.6812300682067871, + "logits/real": -0.8363407850265503, + "logps/generated": -600.5889282226562, + "logps/real": -161.74234008789062, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -21.389225006103516, + "rewards/margins": 21.255290985107422, + "rewards/real": -0.133933424949646, + "step": 560 + }, + { + "epoch": 0.36, + "learning_rate": 4.880331753554502e-07, + "logits/generated": -0.6616766452789307, + "logits/real": -0.8058059811592102, + "logps/generated": -581.693359375, + "logps/real": -169.41537475585938, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -19.568174362182617, + "rewards/margins": 19.344226837158203, + "rewards/real": -0.22394871711730957, + "step": 570 + }, + { + "epoch": 0.37, + "learning_rate": 4.868483412322275e-07, + "logits/generated": -0.6738962531089783, + "logits/real": -0.8422471880912781, + "logps/generated": -610.7338256835938, + "logps/real": -153.44923400878906, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/generated": -20.05523109436035, + "rewards/margins": 19.948062896728516, + "rewards/real": -0.1071687787771225, + "step": 580 + }, + { + "epoch": 0.38, + "learning_rate": 4.856635071090047e-07, + "logits/generated": -0.653414785861969, + "logits/real": -0.9212865829467773, + "logps/generated": -636.19677734375, + "logps/real": -136.12069702148438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/generated": -23.247209548950195, + "rewards/margins": 23.25400161743164, + "rewards/real": 0.006791981868445873, + "step": 590 + }, + { + "epoch": 0.38, + "learning_rate": 4.84478672985782e-07, + "logits/generated": -0.652206301689148, + "logits/real": -0.8476254343986511, + "logps/generated": -619.7491455078125, + "logps/real": -142.06788635253906, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -21.76266860961914, + "rewards/margins": 21.641902923583984, + "rewards/real": -0.12076608836650848, + "step": 600 + }, + { + "epoch": 0.39, + "learning_rate": 4.832938388625591e-07, + "logits/generated": -0.6429646015167236, + "logits/real": -0.8978961706161499, + "logps/generated": -618.6793823242188, + "logps/real": -125.8365707397461, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -22.18993377685547, + "rewards/margins": 22.008413314819336, + "rewards/real": -0.18152059614658356, + "step": 610 + }, + { + "epoch": 0.4, + "learning_rate": 4.821090047393365e-07, + "logits/generated": -0.6130845546722412, + "logits/real": -0.8363273739814758, + "logps/generated": -597.8096923828125, + "logps/real": -160.01922607421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -20.191875457763672, + "rewards/margins": 19.777849197387695, + "rewards/real": -0.4140281081199646, + "step": 620 + }, + { + "epoch": 0.4, + "learning_rate": 4.809241706161137e-07, + "logits/generated": -0.618815541267395, + "logits/real": -0.7665129899978638, + "logps/generated": -627.1131591796875, + "logps/real": -141.24853515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -21.848682403564453, + "rewards/margins": 21.604549407958984, + "rewards/real": -0.24413225054740906, + "step": 630 + }, + { + "epoch": 0.41, + "learning_rate": 4.79739336492891e-07, + "logits/generated": -0.6469287872314453, + "logits/real": -0.7974787354469299, + "logps/generated": -646.8034057617188, + "logps/real": -140.12033081054688, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -24.727245330810547, + "rewards/margins": 24.623071670532227, + "rewards/real": -0.10417119413614273, + "step": 640 + }, + { + "epoch": 0.42, + "learning_rate": 4.785545023696682e-07, + "logits/generated": -0.6693117618560791, + "logits/real": -0.8058202862739563, + "logps/generated": -636.232421875, + "logps/real": -162.97914123535156, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -24.655866622924805, + "rewards/margins": 24.45601463317871, + "rewards/real": -0.19985152781009674, + "step": 650 + }, + { + "epoch": 0.42, + "learning_rate": 4.773696682464455e-07, + "logits/generated": -0.6164982914924622, + "logits/real": -0.7986790537834167, + "logps/generated": -607.1170654296875, + "logps/real": -139.31671142578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -20.806074142456055, + "rewards/margins": 20.69213104248047, + "rewards/real": -0.11394244432449341, + "step": 660 + }, + { + "epoch": 0.43, + "learning_rate": 4.7618483412322273e-07, + "logits/generated": -0.6816304922103882, + "logits/real": -0.7648627161979675, + "logps/generated": -647.4364013671875, + "logps/real": -150.38284301757812, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/generated": -25.9957275390625, + "rewards/margins": 25.87929344177246, + "rewards/real": -0.11643538624048233, + "step": 670 + }, + { + "epoch": 0.44, + "learning_rate": 4.7499999999999995e-07, + "logits/generated": -0.6420483589172363, + "logits/real": -0.8686118125915527, + "logps/generated": -686.556640625, + "logps/real": -170.68507385253906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -26.79473876953125, + "rewards/margins": 26.400888442993164, + "rewards/real": -0.3938508927822113, + "step": 680 + }, + { + "epoch": 0.44, + "learning_rate": 4.738151658767772e-07, + "logits/generated": -0.6229578852653503, + "logits/real": -0.7552638649940491, + "logps/generated": -617.7360229492188, + "logps/real": -133.21524047851562, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -23.75196075439453, + "rewards/margins": 23.369274139404297, + "rewards/real": -0.38268691301345825, + "step": 690 + }, + { + "epoch": 0.45, + "learning_rate": 4.726303317535545e-07, + "logits/generated": -0.6239826679229736, + "logits/real": -0.8113874197006226, + "logps/generated": -617.291748046875, + "logps/real": -171.48641967773438, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -21.23967742919922, + "rewards/margins": 20.85289764404297, + "rewards/real": -0.3867819309234619, + "step": 700 + }, + { + "epoch": 0.45, + "learning_rate": 4.7144549763033177e-07, + "logits/generated": -0.5856727361679077, + "logits/real": -0.7748730182647705, + "logps/generated": -646.7052612304688, + "logps/real": -193.67135620117188, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -23.783584594726562, + "rewards/margins": 23.435470581054688, + "rewards/real": -0.34811311960220337, + "step": 710 + }, + { + "epoch": 0.46, + "learning_rate": 4.70260663507109e-07, + "logits/generated": -0.6293947100639343, + "logits/real": -0.8080043792724609, + "logps/generated": -639.7860107421875, + "logps/real": -146.7048797607422, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -23.628271102905273, + "rewards/margins": 23.445405960083008, + "rewards/real": -0.1828646957874298, + "step": 720 + }, + { + "epoch": 0.47, + "learning_rate": 4.690758293838862e-07, + "logits/generated": -0.615898609161377, + "logits/real": -0.7723320722579956, + "logps/generated": -677.4393310546875, + "logps/real": -144.49502563476562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -26.529178619384766, + "rewards/margins": 26.3712158203125, + "rewards/real": -0.1579606533050537, + "step": 730 + }, + { + "epoch": 0.47, + "learning_rate": 4.678909952606635e-07, + "logits/generated": -0.5974934697151184, + "logits/real": -0.7126566767692566, + "logps/generated": -625.4193115234375, + "logps/real": -168.69549560546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -21.015254974365234, + "rewards/margins": 20.59768295288086, + "rewards/real": -0.4175707697868347, + "step": 740 + }, + { + "epoch": 0.48, + "learning_rate": 4.667061611374407e-07, + "logits/generated": -0.5658475756645203, + "logits/real": -0.7219498157501221, + "logps/generated": -662.0862426757812, + "logps/real": -161.44467163085938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -24.643749237060547, + "rewards/margins": 24.445858001708984, + "rewards/real": -0.1978892832994461, + "step": 750 + }, + { + "epoch": 0.49, + "learning_rate": 4.65521327014218e-07, + "logits/generated": -0.5992667078971863, + "logits/real": -0.834603488445282, + "logps/generated": -645.3001708984375, + "logps/real": -129.46719360351562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -22.823612213134766, + "rewards/margins": 22.714466094970703, + "rewards/real": -0.10914424806833267, + "step": 760 + }, + { + "epoch": 0.49, + "learning_rate": 4.6433649289099525e-07, + "logits/generated": -0.6031894087791443, + "logits/real": -0.8013744354248047, + "logps/generated": -621.9285278320312, + "logps/real": -142.18630981445312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -23.36435317993164, + "rewards/margins": 23.20974349975586, + "rewards/real": -0.15460748970508575, + "step": 770 + }, + { + "epoch": 0.5, + "learning_rate": 4.631516587677725e-07, + "logits/generated": -0.6474970579147339, + "logits/real": -0.7969701290130615, + "logps/generated": -695.6294555664062, + "logps/real": -168.54324340820312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -27.67018699645996, + "rewards/margins": 27.486658096313477, + "rewards/real": -0.1835293024778366, + "step": 780 + }, + { + "epoch": 0.51, + "learning_rate": 4.6196682464454974e-07, + "logits/generated": -0.5945593118667603, + "logits/real": -0.8760132789611816, + "logps/generated": -632.8075561523438, + "logps/real": -137.9114532470703, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -24.00465965270996, + "rewards/margins": 23.872516632080078, + "rewards/real": -0.13214412331581116, + "step": 790 + }, + { + "epoch": 0.51, + "learning_rate": 4.60781990521327e-07, + "logits/generated": -0.6659427285194397, + "logits/real": -0.7805012464523315, + "logps/generated": -676.3375244140625, + "logps/real": -144.7049102783203, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -25.78857421875, + "rewards/margins": 25.562801361083984, + "rewards/real": -0.22577252984046936, + "step": 800 + }, + { + "epoch": 0.52, + "learning_rate": 4.5959715639810423e-07, + "logits/generated": -0.5976084470748901, + "logits/real": -0.7444257140159607, + "logps/generated": -604.505615234375, + "logps/real": -165.31021118164062, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -21.380346298217773, + "rewards/margins": 21.084182739257812, + "rewards/real": -0.2961658239364624, + "step": 810 + }, + { + "epoch": 0.52, + "learning_rate": 4.5841232227488145e-07, + "logits/generated": -0.6201892495155334, + "logits/real": -0.7714813351631165, + "logps/generated": -661.017822265625, + "logps/real": -157.14991760253906, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -25.09454345703125, + "rewards/margins": 24.870752334594727, + "rewards/real": -0.22379302978515625, + "step": 820 + }, + { + "epoch": 0.53, + "learning_rate": 4.5722748815165873e-07, + "logits/generated": -0.6445611715316772, + "logits/real": -0.8067296147346497, + "logps/generated": -723.9563598632812, + "logps/real": -160.5393829345703, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -30.98370361328125, + "rewards/margins": 30.44954490661621, + "rewards/real": -0.5341606736183167, + "step": 830 + }, + { + "epoch": 0.54, + "learning_rate": 4.56042654028436e-07, + "logits/generated": -0.6212409734725952, + "logits/real": -0.8002877235412598, + "logps/generated": -664.5400390625, + "logps/real": -154.18406677246094, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -26.307668685913086, + "rewards/margins": 25.93606185913086, + "rewards/real": -0.37160566449165344, + "step": 840 + }, + { + "epoch": 0.54, + "learning_rate": 4.5485781990521327e-07, + "logits/generated": -0.6696589589118958, + "logits/real": -0.872015655040741, + "logps/generated": -713.975830078125, + "logps/real": -128.1663055419922, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -29.322790145874023, + "rewards/margins": 28.88034439086914, + "rewards/real": -0.44244661927223206, + "step": 850 + }, + { + "epoch": 0.55, + "learning_rate": 4.536729857819905e-07, + "logits/generated": -0.6352511644363403, + "logits/real": -0.8168119192123413, + "logps/generated": -690.1238403320312, + "logps/real": -147.59390258789062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -29.160791397094727, + "rewards/margins": 28.624774932861328, + "rewards/real": -0.5360159873962402, + "step": 860 + }, + { + "epoch": 0.56, + "learning_rate": 4.5248815165876776e-07, + "logits/generated": -0.6255658268928528, + "logits/real": -0.7953276634216309, + "logps/generated": -707.5949096679688, + "logps/real": -164.2091827392578, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -30.47397232055664, + "rewards/margins": 29.822368621826172, + "rewards/real": -0.6516034007072449, + "step": 870 + }, + { + "epoch": 0.56, + "learning_rate": 4.5130331753554504e-07, + "logits/generated": -0.6212276816368103, + "logits/real": -0.7597033381462097, + "logps/generated": -692.6005859375, + "logps/real": -188.38082885742188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -30.789087295532227, + "rewards/margins": 30.043895721435547, + "rewards/real": -0.7451905608177185, + "step": 880 + }, + { + "epoch": 0.57, + "learning_rate": 4.5011848341232226e-07, + "logits/generated": -0.6456987261772156, + "logits/real": -0.8051185607910156, + "logps/generated": -692.8140258789062, + "logps/real": -150.5452880859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -30.668848037719727, + "rewards/margins": 30.199214935302734, + "rewards/real": -0.46962958574295044, + "step": 890 + }, + { + "epoch": 0.58, + "learning_rate": 4.489336492890995e-07, + "logits/generated": -0.6712831258773804, + "logits/real": -0.826252281665802, + "logps/generated": -746.2249145507812, + "logps/real": -135.2972412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.030811309814453, + "rewards/margins": 30.67293930053711, + "rewards/real": -0.3578687012195587, + "step": 900 + }, + { + "epoch": 0.58, + "learning_rate": 4.4774881516587675e-07, + "logits/generated": -0.6518301367759705, + "logits/real": -0.8644415736198425, + "logps/generated": -690.9013671875, + "logps/real": -161.36314392089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -28.964740753173828, + "rewards/margins": 28.42722511291504, + "rewards/real": -0.5375159978866577, + "step": 910 + }, + { + "epoch": 0.59, + "learning_rate": 4.46563981042654e-07, + "logits/generated": -0.6160604953765869, + "logits/real": -0.8334490060806274, + "logps/generated": -717.3743286132812, + "logps/real": -132.34591674804688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -28.668527603149414, + "rewards/margins": 28.09389877319336, + "rewards/real": -0.5746307969093323, + "step": 920 + }, + { + "epoch": 0.6, + "learning_rate": 4.4537914691943124e-07, + "logits/generated": -0.6645776033401489, + "logits/real": -0.749662458896637, + "logps/generated": -705.51708984375, + "logps/real": -170.75979614257812, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -30.325271606445312, + "rewards/margins": 29.850238800048828, + "rewards/real": -0.4750315248966217, + "step": 930 + }, + { + "epoch": 0.6, + "learning_rate": 4.441943127962085e-07, + "logits/generated": -0.5848880410194397, + "logits/real": -0.7599430084228516, + "logps/generated": -679.7612915039062, + "logps/real": -162.4516143798828, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -28.77053451538086, + "rewards/margins": 28.397497177124023, + "rewards/real": -0.37303638458251953, + "step": 940 + }, + { + "epoch": 0.61, + "learning_rate": 4.430094786729858e-07, + "logits/generated": -0.6037660837173462, + "logits/real": -0.7843543887138367, + "logps/generated": -699.0863037109375, + "logps/real": -141.5878143310547, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -30.310083389282227, + "rewards/margins": 29.800827026367188, + "rewards/real": -0.5092543363571167, + "step": 950 + }, + { + "epoch": 0.61, + "learning_rate": 4.4182464454976306e-07, + "logits/generated": -0.6485855579376221, + "logits/real": -0.7831935882568359, + "logps/generated": -758.2669677734375, + "logps/real": -161.2501220703125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -35.414588928222656, + "rewards/margins": 34.92060852050781, + "rewards/real": -0.49398383498191833, + "step": 960 + }, + { + "epoch": 0.62, + "learning_rate": 4.4063981042654023e-07, + "logits/generated": -0.6142803430557251, + "logits/real": -0.8085862398147583, + "logps/generated": -758.5242919921875, + "logps/real": -143.28065490722656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -35.11386489868164, + "rewards/margins": 34.51411056518555, + "rewards/real": -0.5997532606124878, + "step": 970 + }, + { + "epoch": 0.63, + "learning_rate": 4.394549763033175e-07, + "logits/generated": -0.6144439578056335, + "logits/real": -0.7795756459236145, + "logps/generated": -712.9147338867188, + "logps/real": -147.69723510742188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -32.72540283203125, + "rewards/margins": 32.39826202392578, + "rewards/real": -0.3271421492099762, + "step": 980 + }, + { + "epoch": 0.63, + "learning_rate": 4.382701421800948e-07, + "logits/generated": -0.6217916011810303, + "logits/real": -0.7831851840019226, + "logps/generated": -756.0094604492188, + "logps/real": -150.90347290039062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -36.669349670410156, + "rewards/margins": 36.12870788574219, + "rewards/real": -0.5406419634819031, + "step": 990 + }, + { + "epoch": 0.64, + "learning_rate": 4.37085308056872e-07, + "logits/generated": -0.5746406316757202, + "logits/real": -0.7443927526473999, + "logps/generated": -721.3018798828125, + "logps/real": -167.23497009277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.08510971069336, + "rewards/margins": 32.51539993286133, + "rewards/real": -0.5697122812271118, + "step": 1000 + }, + { + "epoch": 0.65, + "learning_rate": 4.3590047393364927e-07, + "logits/generated": -0.6204794645309448, + "logits/real": -0.8241082429885864, + "logps/generated": -771.1573486328125, + "logps/real": -142.06658935546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -36.35809326171875, + "rewards/margins": 35.951690673828125, + "rewards/real": -0.40640267729759216, + "step": 1010 + }, + { + "epoch": 0.65, + "learning_rate": 4.3471563981042654e-07, + "logits/generated": -0.6459885239601135, + "logits/real": -0.7648425698280334, + "logps/generated": -728.7994384765625, + "logps/real": -142.18283081054688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -35.09253692626953, + "rewards/margins": 34.53176498413086, + "rewards/real": -0.5607694387435913, + "step": 1020 + }, + { + "epoch": 0.66, + "learning_rate": 4.335308056872038e-07, + "logits/generated": -0.6198188066482544, + "logits/real": -0.8037668466567993, + "logps/generated": -784.0115966796875, + "logps/real": -148.4331817626953, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -36.82765579223633, + "rewards/margins": 36.30569839477539, + "rewards/real": -0.5219635367393494, + "step": 1030 + }, + { + "epoch": 0.67, + "learning_rate": 4.32345971563981e-07, + "logits/generated": -0.5650381445884705, + "logits/real": -0.7140682339668274, + "logps/generated": -789.3671875, + "logps/real": -162.1250762939453, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -37.974632263183594, + "rewards/margins": 37.51204299926758, + "rewards/real": -0.46258825063705444, + "step": 1040 + }, + { + "epoch": 0.67, + "learning_rate": 4.3116113744075825e-07, + "logits/generated": -0.5569009780883789, + "logits/real": -0.6691209077835083, + "logps/generated": -727.1697387695312, + "logps/real": -149.501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -32.12350082397461, + "rewards/margins": 31.786418914794922, + "rewards/real": -0.3370811939239502, + "step": 1050 + }, + { + "epoch": 0.68, + "learning_rate": 4.299763033175355e-07, + "logits/generated": -0.6008241772651672, + "logits/real": -0.7835357785224915, + "logps/generated": -766.7589111328125, + "logps/real": -131.9623260498047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -37.20771026611328, + "rewards/margins": 36.767494201660156, + "rewards/real": -0.4402230381965637, + "step": 1060 + }, + { + "epoch": 0.68, + "learning_rate": 4.2879146919431274e-07, + "logits/generated": -0.5574159622192383, + "logits/real": -0.7532224059104919, + "logps/generated": -766.7594604492188, + "logps/real": -175.73806762695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -34.75395965576172, + "rewards/margins": 34.18846130371094, + "rewards/real": -0.565497875213623, + "step": 1070 + }, + { + "epoch": 0.69, + "learning_rate": 4.2760663507109e-07, + "logits/generated": -0.6016499400138855, + "logits/real": -0.685789942741394, + "logps/generated": -747.66748046875, + "logps/real": -175.3254852294922, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.80033874511719, + "rewards/margins": 33.334190368652344, + "rewards/real": -0.46614784002304077, + "step": 1080 + }, + { + "epoch": 0.7, + "learning_rate": 4.264218009478673e-07, + "logits/generated": -0.514714777469635, + "logits/real": -0.729649543762207, + "logps/generated": -599.8255615234375, + "logps/real": -137.29776000976562, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/generated": -21.55307960510254, + "rewards/margins": 21.553081512451172, + "rewards/real": 2.1871178432775196e-06, + "step": 1090 + }, + { + "epoch": 0.7, + "learning_rate": 4.2523696682464456e-07, + "logits/generated": -0.44834762811660767, + "logits/real": -0.662898600101471, + "logps/generated": -626.8858642578125, + "logps/real": -151.83450317382812, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -21.125072479248047, + "rewards/margins": 20.952497482299805, + "rewards/real": -0.17257389426231384, + "step": 1100 + }, + { + "epoch": 0.71, + "learning_rate": 4.240521327014218e-07, + "logits/generated": -0.5126262307167053, + "logits/real": -0.6713369488716125, + "logps/generated": -653.6041259765625, + "logps/real": -159.56890869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -23.021862030029297, + "rewards/margins": 22.88108253479004, + "rewards/real": -0.14077897369861603, + "step": 1110 + }, + { + "epoch": 0.72, + "learning_rate": 4.22867298578199e-07, + "logits/generated": -0.4650425910949707, + "logits/real": -0.763433575630188, + "logps/generated": -658.9270629882812, + "logps/real": -135.75672912597656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -24.407550811767578, + "rewards/margins": 24.228670120239258, + "rewards/real": -0.1788794994354248, + "step": 1120 + }, + { + "epoch": 0.72, + "learning_rate": 4.216824644549763e-07, + "logits/generated": -0.4908994138240814, + "logits/real": -0.6465893983840942, + "logps/generated": -632.4884033203125, + "logps/real": -145.31004333496094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -21.62966537475586, + "rewards/margins": 21.41011619567871, + "rewards/real": -0.2195475846529007, + "step": 1130 + }, + { + "epoch": 0.73, + "learning_rate": 4.2049763033175355e-07, + "logits/generated": -0.4437866806983948, + "logits/real": -0.6466466188430786, + "logps/generated": -626.27294921875, + "logps/real": -149.35035705566406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -22.251583099365234, + "rewards/margins": 22.10491371154785, + "rewards/real": -0.1466691941022873, + "step": 1140 + }, + { + "epoch": 0.74, + "learning_rate": 4.1931279620853077e-07, + "logits/generated": -0.47175711393356323, + "logits/real": -0.6160026788711548, + "logps/generated": -677.9451904296875, + "logps/real": -135.2311553955078, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -24.66263771057129, + "rewards/margins": 24.36861801147461, + "rewards/real": -0.2940204441547394, + "step": 1150 + }, + { + "epoch": 0.74, + "learning_rate": 4.1812796208530804e-07, + "logits/generated": -0.5612315535545349, + "logits/real": -0.6705020666122437, + "logps/generated": -676.8372802734375, + "logps/real": -168.85000610351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.101232528686523, + "rewards/margins": 26.762081146240234, + "rewards/real": -0.3391529619693756, + "step": 1160 + }, + { + "epoch": 0.75, + "learning_rate": 4.169431279620853e-07, + "logits/generated": -0.4795566201210022, + "logits/real": -0.6562764644622803, + "logps/generated": -666.4014282226562, + "logps/real": -148.19837951660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -25.7773494720459, + "rewards/margins": 25.312881469726562, + "rewards/real": -0.464468777179718, + "step": 1170 + }, + { + "epoch": 0.75, + "learning_rate": 4.1575829383886253e-07, + "logits/generated": -0.4656401574611664, + "logits/real": -0.6328948736190796, + "logps/generated": -658.664306640625, + "logps/real": -155.73130798339844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -26.921737670898438, + "rewards/margins": 26.695215225219727, + "rewards/real": -0.22652335464954376, + "step": 1180 + }, + { + "epoch": 0.76, + "learning_rate": 4.145734597156398e-07, + "logits/generated": -0.4437786936759949, + "logits/real": -0.6895097494125366, + "logps/generated": -638.5018310546875, + "logps/real": -140.78604125976562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -25.6170654296875, + "rewards/margins": 25.46429443359375, + "rewards/real": -0.15277239680290222, + "step": 1190 + }, + { + "epoch": 0.77, + "learning_rate": 4.1338862559241703e-07, + "logits/generated": -0.4505455493927002, + "logits/real": -0.6155862808227539, + "logps/generated": -652.1417236328125, + "logps/real": -166.3102569580078, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/generated": -25.8651065826416, + "rewards/margins": 25.632049560546875, + "rewards/real": -0.2330542355775833, + "step": 1200 + }, + { + "epoch": 0.77, + "learning_rate": 4.122037914691943e-07, + "logits/generated": -0.5029697418212891, + "logits/real": -0.6487875580787659, + "logps/generated": -634.719482421875, + "logps/real": -166.7958526611328, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -22.444910049438477, + "rewards/margins": 22.197734832763672, + "rewards/real": -0.24717314541339874, + "step": 1210 + }, + { + "epoch": 0.78, + "learning_rate": 4.110189573459715e-07, + "logits/generated": -0.4739890992641449, + "logits/real": -0.7358786463737488, + "logps/generated": -627.636474609375, + "logps/real": -128.64334106445312, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/generated": -23.257991790771484, + "rewards/margins": 23.142520904541016, + "rewards/real": -0.11547265946865082, + "step": 1220 + }, + { + "epoch": 0.79, + "learning_rate": 4.098341232227488e-07, + "logits/generated": -0.49049538373947144, + "logits/real": -0.7324908971786499, + "logps/generated": -697.52392578125, + "logps/real": -150.76388549804688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -27.034936904907227, + "rewards/margins": 26.84500503540039, + "rewards/real": -0.18993662297725677, + "step": 1230 + }, + { + "epoch": 0.79, + "learning_rate": 4.0864928909952607e-07, + "logits/generated": -0.5197226405143738, + "logits/real": -0.732746958732605, + "logps/generated": -697.392333984375, + "logps/real": -138.52279663085938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.976947784423828, + "rewards/margins": 27.898412704467773, + "rewards/real": -0.0785362496972084, + "step": 1240 + }, + { + "epoch": 0.8, + "learning_rate": 4.074644549763033e-07, + "logits/generated": -0.5259883403778076, + "logits/real": -0.6905041933059692, + "logps/generated": -631.3746337890625, + "logps/real": -171.46287536621094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -23.118450164794922, + "rewards/margins": 23.003276824951172, + "rewards/real": -0.11517591774463654, + "step": 1250 + }, + { + "epoch": 0.81, + "learning_rate": 4.0627962085308056e-07, + "logits/generated": -0.4914798140525818, + "logits/real": -0.6694945096969604, + "logps/generated": -665.9325561523438, + "logps/real": -152.16909790039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -26.65480613708496, + "rewards/margins": 26.53061294555664, + "rewards/real": -0.12419945001602173, + "step": 1260 + }, + { + "epoch": 0.81, + "learning_rate": 4.0509478672985783e-07, + "logits/generated": -0.435981810092926, + "logits/real": -0.6437792181968689, + "logps/generated": -639.2318115234375, + "logps/real": -139.5286102294922, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -23.267459869384766, + "rewards/margins": 23.242870330810547, + "rewards/real": -0.024588558822870255, + "step": 1270 + }, + { + "epoch": 0.82, + "learning_rate": 4.0390995260663505e-07, + "logits/generated": -0.46243348717689514, + "logits/real": -0.6304915547370911, + "logps/generated": -671.2921142578125, + "logps/real": -151.30206298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -25.752239227294922, + "rewards/margins": 25.52545738220215, + "rewards/real": -0.2267828732728958, + "step": 1280 + }, + { + "epoch": 0.83, + "learning_rate": 4.0272511848341227e-07, + "logits/generated": -0.4131905436515808, + "logits/real": -0.6179688572883606, + "logps/generated": -685.4735107421875, + "logps/real": -166.12875366210938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -27.9171085357666, + "rewards/margins": 27.653793334960938, + "rewards/real": -0.2633177638053894, + "step": 1290 + }, + { + "epoch": 0.83, + "learning_rate": 4.0154028436018954e-07, + "logits/generated": -0.45457887649536133, + "logits/real": -0.7321020364761353, + "logps/generated": -687.7710571289062, + "logps/real": -118.0494384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -27.879419326782227, + "rewards/margins": 27.65741539001465, + "rewards/real": -0.22200465202331543, + "step": 1300 + }, + { + "epoch": 0.84, + "learning_rate": 4.003554502369668e-07, + "logits/generated": -0.4731278419494629, + "logits/real": -0.6649892926216125, + "logps/generated": -702.4985961914062, + "logps/real": -164.13600158691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -29.949352264404297, + "rewards/margins": 29.615692138671875, + "rewards/real": -0.33366328477859497, + "step": 1310 + }, + { + "epoch": 0.84, + "learning_rate": 3.991706161137441e-07, + "logits/generated": -0.5154431462287903, + "logits/real": -0.6099938750267029, + "logps/generated": -733.9407958984375, + "logps/real": -174.83822631835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.6252384185791, + "rewards/margins": 31.47017478942871, + "rewards/real": -0.15506593883037567, + "step": 1320 + }, + { + "epoch": 0.85, + "learning_rate": 3.979857819905213e-07, + "logits/generated": -0.47972407937049866, + "logits/real": -0.7196077108383179, + "logps/generated": -695.8734741210938, + "logps/real": -144.66249084472656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.702346801757812, + "rewards/margins": 27.56471824645996, + "rewards/real": -0.13762858510017395, + "step": 1330 + }, + { + "epoch": 0.86, + "learning_rate": 3.968009478672986e-07, + "logits/generated": -0.406221866607666, + "logits/real": -0.6687533259391785, + "logps/generated": -655.7437744140625, + "logps/real": -135.68966674804688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -28.703540802001953, + "rewards/margins": 28.489782333374023, + "rewards/real": -0.21375396847724915, + "step": 1340 + }, + { + "epoch": 0.86, + "learning_rate": 3.9561611374407585e-07, + "logits/generated": -0.45106711983680725, + "logits/real": -0.6919107437133789, + "logps/generated": -715.7987060546875, + "logps/real": -138.26852416992188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -32.545223236083984, + "rewards/margins": 32.39795684814453, + "rewards/real": -0.1472676545381546, + "step": 1350 + }, + { + "epoch": 0.87, + "learning_rate": 3.94431279620853e-07, + "logits/generated": -0.48046213388442993, + "logits/real": -0.5451101064682007, + "logps/generated": -697.7542724609375, + "logps/real": -152.2278594970703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -29.348682403564453, + "rewards/margins": 29.207202911376953, + "rewards/real": -0.14147798717021942, + "step": 1360 + }, + { + "epoch": 0.88, + "learning_rate": 3.932464454976303e-07, + "logits/generated": -0.4254804253578186, + "logits/real": -0.6588962078094482, + "logps/generated": -705.3590087890625, + "logps/real": -152.607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -31.081212997436523, + "rewards/margins": 30.919677734375, + "rewards/real": -0.16153457760810852, + "step": 1370 + }, + { + "epoch": 0.88, + "learning_rate": 3.9206161137440757e-07, + "logits/generated": -0.48913320899009705, + "logits/real": -0.6368371248245239, + "logps/generated": -745.2406005859375, + "logps/real": -166.21762084960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.84128952026367, + "rewards/margins": 33.531410217285156, + "rewards/real": -0.30988219380378723, + "step": 1380 + }, + { + "epoch": 0.89, + "learning_rate": 3.9087677725118484e-07, + "logits/generated": -0.4334734380245209, + "logits/real": -0.5950613021850586, + "logps/generated": -713.3173217773438, + "logps/real": -172.47543334960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.074203491210938, + "rewards/margins": 30.784961700439453, + "rewards/real": -0.28924185037612915, + "step": 1390 + }, + { + "epoch": 0.9, + "learning_rate": 3.8969194312796206e-07, + "logits/generated": -0.49943074584007263, + "logits/real": -0.6621376276016235, + "logps/generated": -750.9681396484375, + "logps/real": -154.4829864501953, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -32.702816009521484, + "rewards/margins": 32.558631896972656, + "rewards/real": -0.14418402314186096, + "step": 1400 + }, + { + "epoch": 0.9, + "learning_rate": 3.8850710900473933e-07, + "logits/generated": -0.4399910867214203, + "logits/real": -0.6627537608146667, + "logps/generated": -712.4768676757812, + "logps/real": -149.22299194335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -30.581218719482422, + "rewards/margins": 30.372350692749023, + "rewards/real": -0.2088705599308014, + "step": 1410 + }, + { + "epoch": 0.91, + "learning_rate": 3.873222748815166e-07, + "logits/generated": -0.4090496897697449, + "logits/real": -0.6347898244857788, + "logps/generated": -691.2296752929688, + "logps/real": -159.6715545654297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -28.685245513916016, + "rewards/margins": 28.553665161132812, + "rewards/real": -0.13157956302165985, + "step": 1420 + }, + { + "epoch": 0.91, + "learning_rate": 3.8613744075829377e-07, + "logits/generated": -0.44337087869644165, + "logits/real": -0.7070174813270569, + "logps/generated": -725.4908447265625, + "logps/real": -135.2198486328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -31.43515396118164, + "rewards/margins": 31.318073272705078, + "rewards/real": -0.11708203703165054, + "step": 1430 + }, + { + "epoch": 0.92, + "learning_rate": 3.8495260663507104e-07, + "logits/generated": -0.45473846793174744, + "logits/real": -0.7236835360527039, + "logps/generated": -727.946533203125, + "logps/real": -142.0230712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.88336944580078, + "rewards/margins": 33.610408782958984, + "rewards/real": -0.27295243740081787, + "step": 1440 + }, + { + "epoch": 0.93, + "learning_rate": 3.837677725118483e-07, + "logits/generated": -0.5566205978393555, + "logits/real": -0.7040198445320129, + "logps/generated": -782.2391357421875, + "logps/real": -159.59271240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -36.38234329223633, + "rewards/margins": 36.12818908691406, + "rewards/real": -0.2541573643684387, + "step": 1450 + }, + { + "epoch": 0.93, + "learning_rate": 3.825829383886256e-07, + "logits/generated": -0.4843382239341736, + "logits/real": -0.7185007929801941, + "logps/generated": -782.1573486328125, + "logps/real": -133.78878784179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.17876434326172, + "rewards/margins": 36.1375846862793, + "rewards/real": -0.04117864370346069, + "step": 1460 + }, + { + "epoch": 0.94, + "learning_rate": 3.813981042654028e-07, + "logits/generated": -0.527305006980896, + "logits/real": -0.750108540058136, + "logps/generated": -724.2720947265625, + "logps/real": -165.68260192871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -35.12436294555664, + "rewards/margins": 34.905982971191406, + "rewards/real": -0.21837835013866425, + "step": 1470 + }, + { + "epoch": 0.95, + "learning_rate": 3.802132701421801e-07, + "logits/generated": -0.4532243609428406, + "logits/real": -0.7372425198554993, + "logps/generated": -707.7957763671875, + "logps/real": -111.7388687133789, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.10491943359375, + "rewards/margins": 30.893756866455078, + "rewards/real": -0.21116304397583008, + "step": 1480 + }, + { + "epoch": 0.95, + "learning_rate": 3.7902843601895736e-07, + "logits/generated": -0.5261486768722534, + "logits/real": -0.7524106502532959, + "logps/generated": -750.0657958984375, + "logps/real": -141.4464111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.03588104248047, + "rewards/margins": 35.844032287597656, + "rewards/real": -0.19184735417366028, + "step": 1490 + }, + { + "epoch": 0.96, + "learning_rate": 3.778436018957346e-07, + "logits/generated": -0.49053382873535156, + "logits/real": -0.6678867936134338, + "logps/generated": -723.454833984375, + "logps/real": -158.5665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -32.78173828125, + "rewards/margins": 32.50086975097656, + "rewards/real": -0.280868798494339, + "step": 1500 + }, + { + "epoch": 0.97, + "learning_rate": 3.766587677725118e-07, + "logits/generated": -0.5047518014907837, + "logits/real": -0.7079204320907593, + "logps/generated": -738.470947265625, + "logps/real": -146.69248962402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.998992919921875, + "rewards/margins": 33.81964111328125, + "rewards/real": -0.1793525069952011, + "step": 1510 + }, + { + "epoch": 0.97, + "learning_rate": 3.7547393364928907e-07, + "logits/generated": -0.47372421622276306, + "logits/real": -0.7369820475578308, + "logps/generated": -785.177978515625, + "logps/real": -148.13267517089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.820003509521484, + "rewards/margins": 36.730655670166016, + "rewards/real": -0.08935005962848663, + "step": 1520 + }, + { + "epoch": 0.98, + "learning_rate": 3.7428909952606634e-07, + "logits/generated": -0.5118182897567749, + "logits/real": -0.6915109753608704, + "logps/generated": -767.2000732421875, + "logps/real": -178.1002655029297, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -36.287994384765625, + "rewards/margins": 35.94424057006836, + "rewards/real": -0.3437514007091522, + "step": 1530 + }, + { + "epoch": 0.99, + "learning_rate": 3.7310426540284356e-07, + "logits/generated": -0.48033565282821655, + "logits/real": -0.6938971281051636, + "logps/generated": -790.1136474609375, + "logps/real": -139.7992401123047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -37.80128860473633, + "rewards/margins": 37.25532531738281, + "rewards/real": -0.5459665060043335, + "step": 1540 + }, + { + "epoch": 0.99, + "learning_rate": 3.7191943127962083e-07, + "logits/generated": -0.48637381196022034, + "logits/real": -0.6838294863700867, + "logps/generated": -765.7503662109375, + "logps/real": -158.60995483398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -35.00304412841797, + "rewards/margins": 34.73870086669922, + "rewards/real": -0.26434019207954407, + "step": 1550 + }, + { + "epoch": 1.0, + "learning_rate": 3.707345971563981e-07, + "logits/generated": -0.44851940870285034, + "logits/real": -0.6709171533584595, + "logps/generated": -771.0245971679688, + "logps/real": -168.75152587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -38.06503677368164, + "rewards/margins": 37.67142105102539, + "rewards/real": -0.3936167359352112, + "step": 1560 + }, + { + "epoch": 1.0, + "learning_rate": 3.695497630331754e-07, + "logits/generated": -0.45538201928138733, + "logits/real": -0.6043254733085632, + "logps/generated": -774.0435791015625, + "logps/real": -145.94451904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.95838928222656, + "rewards/margins": 36.758235931396484, + "rewards/real": -0.2001533955335617, + "step": 1570 + }, + { + "epoch": 1.01, + "learning_rate": 3.683649289099526e-07, + "logits/generated": -0.4710386395454407, + "logits/real": -0.6742110252380371, + "logps/generated": -798.041259765625, + "logps/real": -149.87484741210938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -38.644805908203125, + "rewards/margins": 38.51628875732422, + "rewards/real": -0.12851884961128235, + "step": 1580 + }, + { + "epoch": 1.02, + "learning_rate": 3.671800947867298e-07, + "logits/generated": -0.4763055741786957, + "logits/real": -0.6764456629753113, + "logps/generated": -791.0558471679688, + "logps/real": -157.50120544433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -38.532691955566406, + "rewards/margins": 38.26321029663086, + "rewards/real": -0.26947957277297974, + "step": 1590 + }, + { + "epoch": 1.02, + "learning_rate": 3.659952606635071e-07, + "logits/generated": -0.47692328691482544, + "logits/real": -0.606033980846405, + "logps/generated": -754.9681396484375, + "logps/real": -148.64236450195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -35.7038459777832, + "rewards/margins": 35.1637077331543, + "rewards/real": -0.5401372313499451, + "step": 1600 + }, + { + "epoch": 1.03, + "learning_rate": 3.648104265402843e-07, + "logits/generated": -0.49335426092147827, + "logits/real": -0.68101966381073, + "logps/generated": -862.1978759765625, + "logps/real": -143.81234741210938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.46797180175781, + "rewards/margins": 43.25313186645508, + "rewards/real": -0.21484307944774628, + "step": 1610 + }, + { + "epoch": 1.04, + "learning_rate": 3.636255924170616e-07, + "logits/generated": -0.4993807375431061, + "logits/real": -0.6376734972000122, + "logps/generated": -830.0358276367188, + "logps/real": -173.48251342773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.75886154174805, + "rewards/margins": 42.36219787597656, + "rewards/real": -0.3966585397720337, + "step": 1620 + }, + { + "epoch": 1.04, + "learning_rate": 3.6244075829383886e-07, + "logits/generated": -0.4894142746925354, + "logits/real": -0.671286940574646, + "logps/generated": -791.9283447265625, + "logps/real": -157.88449096679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -37.980499267578125, + "rewards/margins": 37.63560104370117, + "rewards/real": -0.34489426016807556, + "step": 1630 + }, + { + "epoch": 1.05, + "learning_rate": 3.6125592417061613e-07, + "logits/generated": -0.4308968484401703, + "logits/real": -0.6662777066230774, + "logps/generated": -815.94970703125, + "logps/real": -132.9452667236328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.20854187011719, + "rewards/margins": 40.93267059326172, + "rewards/real": -0.275868684053421, + "step": 1640 + }, + { + "epoch": 1.06, + "learning_rate": 3.6007109004739335e-07, + "logits/generated": -0.44717854261398315, + "logits/real": -0.6490769982337952, + "logps/generated": -752.9622192382812, + "logps/real": -155.48178100585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.214019775390625, + "rewards/margins": 35.96308135986328, + "rewards/real": -0.25093746185302734, + "step": 1650 + }, + { + "epoch": 1.06, + "learning_rate": 3.588862559241706e-07, + "logits/generated": -0.47657886147499084, + "logits/real": -0.6330237984657288, + "logps/generated": -787.4442749023438, + "logps/real": -159.59701538085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.82368850708008, + "rewards/margins": 36.52547073364258, + "rewards/real": -0.2982181906700134, + "step": 1660 + }, + { + "epoch": 1.07, + "learning_rate": 3.5770142180094784e-07, + "logits/generated": -0.4685605466365814, + "logits/real": -0.6297181844711304, + "logps/generated": -827.0250244140625, + "logps/real": -150.63694763183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.97758483886719, + "rewards/margins": 41.71283721923828, + "rewards/real": -0.26475200057029724, + "step": 1670 + }, + { + "epoch": 1.07, + "learning_rate": 3.5651658767772506e-07, + "logits/generated": -0.45342230796813965, + "logits/real": -0.6486082673072815, + "logps/generated": -749.4054565429688, + "logps/real": -126.46002197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -35.42198944091797, + "rewards/margins": 35.160091400146484, + "rewards/real": -0.26189571619033813, + "step": 1680 + }, + { + "epoch": 1.08, + "learning_rate": 3.5533175355450234e-07, + "logits/generated": -0.4470803141593933, + "logits/real": -0.5735016465187073, + "logps/generated": -766.5286865234375, + "logps/real": -174.92886352539062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -39.0324592590332, + "rewards/margins": 38.663352966308594, + "rewards/real": -0.3691008687019348, + "step": 1690 + }, + { + "epoch": 1.09, + "learning_rate": 3.541469194312796e-07, + "logits/generated": -0.47693657875061035, + "logits/real": -0.7134417295455933, + "logps/generated": -820.4762573242188, + "logps/real": -122.05989837646484, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.139793395996094, + "rewards/margins": 42.87862014770508, + "rewards/real": -0.26117831468582153, + "step": 1700 + }, + { + "epoch": 1.09, + "learning_rate": 3.529620853080569e-07, + "logits/generated": -0.45882320404052734, + "logits/real": -0.664508044719696, + "logps/generated": -831.1781005859375, + "logps/real": -147.54002380371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.169921875, + "rewards/margins": 40.772850036621094, + "rewards/real": -0.39707642793655396, + "step": 1710 + }, + { + "epoch": 1.1, + "learning_rate": 3.517772511848341e-07, + "logits/generated": -0.4301510453224182, + "logits/real": -0.6589769124984741, + "logps/generated": -819.22119140625, + "logps/real": -183.15982055664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.374855041503906, + "rewards/margins": 41.15515899658203, + "rewards/real": -0.2196962833404541, + "step": 1720 + }, + { + "epoch": 1.11, + "learning_rate": 3.505924170616114e-07, + "logits/generated": -0.4489319920539856, + "logits/real": -0.6075456738471985, + "logps/generated": -850.15673828125, + "logps/real": -150.83221435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.46028137207031, + "rewards/margins": 43.98347473144531, + "rewards/real": -0.47680991888046265, + "step": 1730 + }, + { + "epoch": 1.11, + "learning_rate": 3.4940758293838865e-07, + "logits/generated": -0.4293234944343567, + "logits/real": -0.7189976572990417, + "logps/generated": -835.0768432617188, + "logps/real": -131.94656372070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.448909759521484, + "rewards/margins": 42.11590576171875, + "rewards/real": -0.3330024182796478, + "step": 1740 + }, + { + "epoch": 1.12, + "learning_rate": 3.482227488151658e-07, + "logits/generated": -0.41782283782958984, + "logits/real": -0.5898563265800476, + "logps/generated": -835.0525512695312, + "logps/real": -165.47747802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.286441802978516, + "rewards/margins": 42.87544631958008, + "rewards/real": -0.4109969735145569, + "step": 1750 + }, + { + "epoch": 1.13, + "learning_rate": 3.470379146919431e-07, + "logits/generated": -0.4614785313606262, + "logits/real": -0.5493655204772949, + "logps/generated": -817.9602661132812, + "logps/real": -168.73268127441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -40.68376159667969, + "rewards/margins": 40.22270202636719, + "rewards/real": -0.4610599875450134, + "step": 1760 + }, + { + "epoch": 1.13, + "learning_rate": 3.4585308056872036e-07, + "logits/generated": -0.4272429347038269, + "logits/real": -0.5022194981575012, + "logps/generated": -787.2926635742188, + "logps/real": -167.05831909179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -38.47704315185547, + "rewards/margins": 38.09776306152344, + "rewards/real": -0.37928327918052673, + "step": 1770 + }, + { + "epoch": 1.14, + "learning_rate": 3.4466824644549763e-07, + "logits/generated": -0.4905944764614105, + "logits/real": -0.6622756719589233, + "logps/generated": -805.285400390625, + "logps/real": -178.21536254882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.157222747802734, + "rewards/margins": 40.712562561035156, + "rewards/real": -0.4446594715118408, + "step": 1780 + }, + { + "epoch": 1.15, + "learning_rate": 3.4348341232227485e-07, + "logits/generated": -0.4158329367637634, + "logits/real": -0.6186385750770569, + "logps/generated": -749.114501953125, + "logps/real": -156.46290588378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -36.60091781616211, + "rewards/margins": 36.17322540283203, + "rewards/real": -0.4276936650276184, + "step": 1790 + }, + { + "epoch": 1.15, + "learning_rate": 3.422985781990521e-07, + "logits/generated": -0.4725651144981384, + "logits/real": -0.652617335319519, + "logps/generated": -847.3025512695312, + "logps/real": -150.90065002441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.543460845947266, + "rewards/margins": 42.95781326293945, + "rewards/real": -0.5856472253799438, + "step": 1800 + }, + { + "epoch": 1.16, + "learning_rate": 3.411137440758294e-07, + "logits/generated": -0.4408513605594635, + "logits/real": -0.6833234429359436, + "logps/generated": -768.7931518554688, + "logps/real": -134.6144256591797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -37.13344955444336, + "rewards/margins": 36.78771209716797, + "rewards/real": -0.3457415997982025, + "step": 1810 + }, + { + "epoch": 1.16, + "learning_rate": 3.3992890995260667e-07, + "logits/generated": -0.4581897258758545, + "logits/real": -0.595461368560791, + "logps/generated": -827.2222900390625, + "logps/real": -174.0118408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.74081039428711, + "rewards/margins": 42.331790924072266, + "rewards/real": -0.40901678800582886, + "step": 1820 + }, + { + "epoch": 1.17, + "learning_rate": 3.3874407582938384e-07, + "logits/generated": -0.45837849378585815, + "logits/real": -0.6876403093338013, + "logps/generated": -833.8259887695312, + "logps/real": -141.63064575195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.820953369140625, + "rewards/margins": 41.39267349243164, + "rewards/real": -0.42827802896499634, + "step": 1830 + }, + { + "epoch": 1.18, + "learning_rate": 3.375592417061611e-07, + "logits/generated": -0.48998793959617615, + "logits/real": -0.6868919730186462, + "logps/generated": -858.2042846679688, + "logps/real": -145.3782196044922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.131046295166016, + "rewards/margins": 43.59005355834961, + "rewards/real": -0.5409911870956421, + "step": 1840 + }, + { + "epoch": 1.18, + "learning_rate": 3.363744075829384e-07, + "logits/generated": -0.4100268483161926, + "logits/real": -0.694664478302002, + "logps/generated": -809.3135375976562, + "logps/real": -166.88694763183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.03974151611328, + "rewards/margins": 40.51675033569336, + "rewards/real": -0.5229931473731995, + "step": 1850 + }, + { + "epoch": 1.19, + "learning_rate": 3.351895734597156e-07, + "logits/generated": -0.49493294954299927, + "logits/real": -0.6615623235702515, + "logps/generated": -918.6854248046875, + "logps/real": -144.638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.01395797729492, + "rewards/margins": 51.5162467956543, + "rewards/real": -0.4977096915245056, + "step": 1860 + }, + { + "epoch": 1.2, + "learning_rate": 3.340047393364929e-07, + "logits/generated": -0.3965403735637665, + "logits/real": -0.6068152189254761, + "logps/generated": -797.3876342773438, + "logps/real": -138.4888458251953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -39.271629333496094, + "rewards/margins": 38.95609664916992, + "rewards/real": -0.3155314326286316, + "step": 1870 + }, + { + "epoch": 1.2, + "learning_rate": 3.3281990521327015e-07, + "logits/generated": -0.43841552734375, + "logits/real": -0.5667222738265991, + "logps/generated": -804.7280883789062, + "logps/real": -173.30206298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -39.95454788208008, + "rewards/margins": 39.456111907958984, + "rewards/real": -0.49843597412109375, + "step": 1880 + }, + { + "epoch": 1.21, + "learning_rate": 3.316350710900474e-07, + "logits/generated": -0.38486871123313904, + "logits/real": -0.5955111384391785, + "logps/generated": -779.2044677734375, + "logps/real": -138.0015869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -38.943260192871094, + "rewards/margins": 38.430908203125, + "rewards/real": -0.5123514533042908, + "step": 1890 + }, + { + "epoch": 1.22, + "learning_rate": 3.304502369668246e-07, + "logits/generated": -0.4344411790370941, + "logits/real": -0.662503182888031, + "logps/generated": -789.8323974609375, + "logps/real": -138.18624877929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -39.464324951171875, + "rewards/margins": 39.04804229736328, + "rewards/real": -0.41627994179725647, + "step": 1900 + }, + { + "epoch": 1.22, + "learning_rate": 3.2926540284360186e-07, + "logits/generated": -0.40550222992897034, + "logits/real": -0.6265038251876831, + "logps/generated": -832.6241455078125, + "logps/real": -186.16464233398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.117034912109375, + "rewards/margins": 41.634342193603516, + "rewards/real": -0.48269376158714294, + "step": 1910 + }, + { + "epoch": 1.23, + "learning_rate": 3.2808056872037913e-07, + "logits/generated": -0.4908333718776703, + "logits/real": -0.6979160904884338, + "logps/generated": -800.664794921875, + "logps/real": -144.0208740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -39.349517822265625, + "rewards/margins": 38.85806655883789, + "rewards/real": -0.491449773311615, + "step": 1920 + }, + { + "epoch": 1.23, + "learning_rate": 3.2689573459715635e-07, + "logits/generated": -0.4737107753753662, + "logits/real": -0.6124163866043091, + "logps/generated": -789.6759643554688, + "logps/real": -173.1675262451172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -41.43712615966797, + "rewards/margins": 40.75090789794922, + "rewards/real": -0.6862186193466187, + "step": 1930 + }, + { + "epoch": 1.24, + "learning_rate": 3.2571090047393363e-07, + "logits/generated": -0.4662472605705261, + "logits/real": -0.7021108865737915, + "logps/generated": -912.7789916992188, + "logps/real": -144.39654541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.74687576293945, + "rewards/margins": 50.47743606567383, + "rewards/real": -0.26943859457969666, + "step": 1940 + }, + { + "epoch": 1.25, + "learning_rate": 3.245260663507109e-07, + "logits/generated": -0.5851739645004272, + "logits/real": -0.708136260509491, + "logps/generated": -838.8018798828125, + "logps/real": -151.24722290039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.7834358215332, + "rewards/margins": 44.43694305419922, + "rewards/real": -0.3464917838573456, + "step": 1950 + }, + { + "epoch": 1.25, + "learning_rate": 3.2334123222748817e-07, + "logits/generated": -0.5089236497879028, + "logits/real": -0.6847448945045471, + "logps/generated": -894.1632690429688, + "logps/real": -151.0913543701172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.116729736328125, + "rewards/margins": 47.81326675415039, + "rewards/real": -0.3034594655036926, + "step": 1960 + }, + { + "epoch": 1.26, + "learning_rate": 3.221563981042654e-07, + "logits/generated": -0.4521718919277191, + "logits/real": -0.609528660774231, + "logps/generated": -829.3768310546875, + "logps/real": -167.9632568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.617408752441406, + "rewards/margins": 43.08763885498047, + "rewards/real": -0.5297662019729614, + "step": 1970 + }, + { + "epoch": 1.27, + "learning_rate": 3.209715639810426e-07, + "logits/generated": -0.5104943513870239, + "logits/real": -0.6578128933906555, + "logps/generated": -897.2025146484375, + "logps/real": -146.1173858642578, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.29200744628906, + "rewards/margins": 49.76136016845703, + "rewards/real": -0.5306479930877686, + "step": 1980 + }, + { + "epoch": 1.27, + "learning_rate": 3.197867298578199e-07, + "logits/generated": -0.453556627035141, + "logits/real": -0.6318106651306152, + "logps/generated": -901.5166015625, + "logps/real": -137.2063751220703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -49.143409729003906, + "rewards/margins": 48.74793243408203, + "rewards/real": -0.3954845070838928, + "step": 1990 + }, + { + "epoch": 1.28, + "learning_rate": 3.186018957345971e-07, + "logits/generated": -0.530498743057251, + "logits/real": -0.6793403029441833, + "logps/generated": -878.6456909179688, + "logps/real": -138.333251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.53958511352539, + "rewards/margins": 47.25537872314453, + "rewards/real": -0.2842068076133728, + "step": 2000 + }, + { + "epoch": 1.29, + "learning_rate": 3.174170616113744e-07, + "logits/generated": -0.5185251832008362, + "logits/real": -0.6888160109519958, + "logps/generated": -882.78076171875, + "logps/real": -149.84170532226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -46.70854187011719, + "rewards/margins": 46.299781799316406, + "rewards/real": -0.40876227617263794, + "step": 2010 + }, + { + "epoch": 1.29, + "learning_rate": 3.1623222748815165e-07, + "logits/generated": -0.5407160520553589, + "logits/real": -0.6995197534561157, + "logps/generated": -925.1043701171875, + "logps/real": -172.48684692382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.484397888183594, + "rewards/margins": 52.23331832885742, + "rewards/real": -0.2510821521282196, + "step": 2020 + }, + { + "epoch": 1.3, + "learning_rate": 3.150473933649289e-07, + "logits/generated": -0.47896209359169006, + "logits/real": -0.648679792881012, + "logps/generated": -855.0648193359375, + "logps/real": -140.78317260742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.74834442138672, + "rewards/margins": 44.42583084106445, + "rewards/real": -0.3225128650665283, + "step": 2030 + }, + { + "epoch": 1.31, + "learning_rate": 3.1386255924170614e-07, + "logits/generated": -0.48660707473754883, + "logits/real": -0.6823971271514893, + "logps/generated": -855.0968627929688, + "logps/real": -149.41519165039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -45.171939849853516, + "rewards/margins": 44.90800857543945, + "rewards/real": -0.26393207907676697, + "step": 2040 + }, + { + "epoch": 1.31, + "learning_rate": 3.126777251184834e-07, + "logits/generated": -0.4488789141178131, + "logits/real": -0.6946064233779907, + "logps/generated": -834.7445068359375, + "logps/real": -149.4184112548828, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.987037658691406, + "rewards/margins": 44.69217300415039, + "rewards/real": -0.29486605525016785, + "step": 2050 + }, + { + "epoch": 1.32, + "learning_rate": 3.1149289099526064e-07, + "logits/generated": -0.46628251671791077, + "logits/real": -0.6745079159736633, + "logps/generated": -957.2546997070312, + "logps/real": -154.1484832763672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.21331024169922, + "rewards/margins": 52.879371643066406, + "rewards/real": -0.33394068479537964, + "step": 2060 + }, + { + "epoch": 1.32, + "learning_rate": 3.103080568720379e-07, + "logits/generated": -0.49304255843162537, + "logits/real": -0.6859273314476013, + "logps/generated": -900.3800659179688, + "logps/real": -166.83229064941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.62682342529297, + "rewards/margins": 48.23986053466797, + "rewards/real": -0.3869660794734955, + "step": 2070 + }, + { + "epoch": 1.33, + "learning_rate": 3.0912322274881513e-07, + "logits/generated": -0.532124400138855, + "logits/real": -0.6791267991065979, + "logps/generated": -867.7174072265625, + "logps/real": -173.84207153320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -45.860843658447266, + "rewards/margins": 45.43999481201172, + "rewards/real": -0.4208555817604065, + "step": 2080 + }, + { + "epoch": 1.34, + "learning_rate": 3.079383886255924e-07, + "logits/generated": -0.5052396655082703, + "logits/real": -0.6648889780044556, + "logps/generated": -909.2999267578125, + "logps/real": -178.1378631591797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.73103332519531, + "rewards/margins": 47.32225799560547, + "rewards/real": -0.40877556800842285, + "step": 2090 + }, + { + "epoch": 1.34, + "learning_rate": 3.067535545023697e-07, + "logits/generated": -0.5442999601364136, + "logits/real": -0.7468653917312622, + "logps/generated": -960.0681762695312, + "logps/real": -151.3079071044922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -56.67901611328125, + "rewards/margins": 56.42626953125, + "rewards/real": -0.25274744629859924, + "step": 2100 + }, + { + "epoch": 1.35, + "learning_rate": 3.055687203791469e-07, + "logits/generated": -0.49901169538497925, + "logits/real": -0.6442614793777466, + "logps/generated": -879.4744873046875, + "logps/real": -180.48049926757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.23583221435547, + "rewards/margins": 46.76594161987305, + "rewards/real": -0.4698910713195801, + "step": 2110 + }, + { + "epoch": 1.36, + "learning_rate": 3.0438388625592417e-07, + "logits/generated": -0.4474611282348633, + "logits/real": -0.6742789149284363, + "logps/generated": -857.6492309570312, + "logps/real": -136.08782958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.95075988769531, + "rewards/margins": 43.52191925048828, + "rewards/real": -0.4288388788700104, + "step": 2120 + }, + { + "epoch": 1.36, + "learning_rate": 3.0319905213270144e-07, + "logits/generated": -0.42652368545532227, + "logits/real": -0.6252545118331909, + "logps/generated": -812.95361328125, + "logps/real": -163.1314239501953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -42.40178680419922, + "rewards/margins": 42.024620056152344, + "rewards/real": -0.37716203927993774, + "step": 2130 + }, + { + "epoch": 1.37, + "learning_rate": 3.0201421800947866e-07, + "logits/generated": -0.41971296072006226, + "logits/real": -0.6229659914970398, + "logps/generated": -934.0114135742188, + "logps/real": -141.24195861816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -51.96159744262695, + "rewards/margins": 51.59962844848633, + "rewards/real": -0.3619686961174011, + "step": 2140 + }, + { + "epoch": 1.38, + "learning_rate": 3.008293838862559e-07, + "logits/generated": -0.5068638324737549, + "logits/real": -0.7758525609970093, + "logps/generated": -907.1189575195312, + "logps/real": -138.0588836669922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.612152099609375, + "rewards/margins": 47.226993560791016, + "rewards/real": -0.38516414165496826, + "step": 2150 + }, + { + "epoch": 1.38, + "learning_rate": 2.9964454976303315e-07, + "logits/generated": -0.4722062945365906, + "logits/real": -0.6450417041778564, + "logps/generated": -845.0633544921875, + "logps/real": -173.4315185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -44.63590621948242, + "rewards/margins": 44.2160530090332, + "rewards/real": -0.41985201835632324, + "step": 2160 + }, + { + "epoch": 1.39, + "learning_rate": 2.984597156398104e-07, + "logits/generated": -0.49884462356567383, + "logits/real": -0.6271129846572876, + "logps/generated": -924.2891845703125, + "logps/real": -160.28036499023438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.93656539916992, + "rewards/margins": 52.53998947143555, + "rewards/real": -0.3965730667114258, + "step": 2170 + }, + { + "epoch": 1.39, + "learning_rate": 2.9727488151658765e-07, + "logits/generated": -0.48375964164733887, + "logits/real": -0.7125850915908813, + "logps/generated": -971.89404296875, + "logps/real": -155.76739501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.2780876159668, + "rewards/margins": 56.74763870239258, + "rewards/real": -0.5304462909698486, + "step": 2180 + }, + { + "epoch": 1.4, + "learning_rate": 2.960900473933649e-07, + "logits/generated": -0.5001789927482605, + "logits/real": -0.6946722269058228, + "logps/generated": -875.4529418945312, + "logps/real": -136.772216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -45.757354736328125, + "rewards/margins": 45.39958953857422, + "rewards/real": -0.3577651083469391, + "step": 2190 + }, + { + "epoch": 1.41, + "learning_rate": 2.949052132701422e-07, + "logits/generated": -0.481309711933136, + "logits/real": -0.6437762975692749, + "logps/generated": -897.0853271484375, + "logps/real": -148.08895874023438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.52891540527344, + "rewards/margins": 48.114437103271484, + "rewards/real": -0.4144725203514099, + "step": 2200 + }, + { + "epoch": 1.41, + "learning_rate": 2.9372037914691946e-07, + "logits/generated": -0.505331814289093, + "logits/real": -0.7198413014411926, + "logps/generated": -943.2364501953125, + "logps/real": -126.28971099853516, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.058265686035156, + "rewards/margins": 51.653160095214844, + "rewards/real": -0.4051007330417633, + "step": 2210 + }, + { + "epoch": 1.42, + "learning_rate": 2.9253554502369663e-07, + "logits/generated": -0.4623163342475891, + "logits/real": -0.6992497444152832, + "logps/generated": -924.9318237304688, + "logps/real": -165.45114135742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.68879318237305, + "rewards/margins": 52.19450759887695, + "rewards/real": -0.4942806363105774, + "step": 2220 + }, + { + "epoch": 1.43, + "learning_rate": 2.913507109004739e-07, + "logits/generated": -0.4906153082847595, + "logits/real": -0.6900730729103088, + "logps/generated": -922.6204223632812, + "logps/real": -129.15237426757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -51.038394927978516, + "rewards/margins": 50.75798797607422, + "rewards/real": -0.2804059386253357, + "step": 2230 + }, + { + "epoch": 1.43, + "learning_rate": 2.901658767772512e-07, + "logits/generated": -0.4813242554664612, + "logits/real": -0.7062429785728455, + "logps/generated": -821.6672973632812, + "logps/real": -163.94393920898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -43.79729461669922, + "rewards/margins": 43.21052932739258, + "rewards/real": -0.5867670178413391, + "step": 2240 + }, + { + "epoch": 1.44, + "learning_rate": 2.889810426540284e-07, + "logits/generated": -0.43650323152542114, + "logits/real": -0.723192572593689, + "logps/generated": -875.97412109375, + "logps/real": -134.3582000732422, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.2892951965332, + "rewards/margins": 47.99460220336914, + "rewards/real": -0.2946951985359192, + "step": 2250 + }, + { + "epoch": 1.45, + "learning_rate": 2.8779620853080567e-07, + "logits/generated": -0.4641779065132141, + "logits/real": -0.6070187091827393, + "logps/generated": -904.2113037109375, + "logps/real": -158.0858612060547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -51.96845245361328, + "rewards/margins": 51.64441680908203, + "rewards/real": -0.3240307569503784, + "step": 2260 + }, + { + "epoch": 1.45, + "learning_rate": 2.8661137440758294e-07, + "logits/generated": -0.543385922908783, + "logits/real": -0.6664692163467407, + "logps/generated": -886.740234375, + "logps/real": -158.68768310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.016876220703125, + "rewards/margins": 47.88544464111328, + "rewards/real": -0.13143035769462585, + "step": 2270 + }, + { + "epoch": 1.46, + "learning_rate": 2.854265402843602e-07, + "logits/generated": -0.5325735807418823, + "logits/real": -0.6880441904067993, + "logps/generated": -889.3059692382812, + "logps/real": -143.56271362304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -48.709747314453125, + "rewards/margins": 48.41456604003906, + "rewards/real": -0.29517900943756104, + "step": 2280 + }, + { + "epoch": 1.47, + "learning_rate": 2.842417061611374e-07, + "logits/generated": -0.5220402479171753, + "logits/real": -0.7008036375045776, + "logps/generated": -901.8049926757812, + "logps/real": -151.60629272460938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.60210037231445, + "rewards/margins": 47.190391540527344, + "rewards/real": -0.4117053151130676, + "step": 2290 + }, + { + "epoch": 1.47, + "learning_rate": 2.8305687203791465e-07, + "logits/generated": -0.5935906171798706, + "logits/real": -0.8247605562210083, + "logps/generated": -907.3389892578125, + "logps/real": -138.0072479248047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -49.43927001953125, + "rewards/margins": 49.20110321044922, + "rewards/real": -0.23817138373851776, + "step": 2300 + }, + { + "epoch": 1.48, + "learning_rate": 2.8187203791469193e-07, + "logits/generated": -0.5039738416671753, + "logits/real": -0.7532294988632202, + "logps/generated": -940.0494995117188, + "logps/real": -136.12376403808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.96204376220703, + "rewards/margins": 55.6636962890625, + "rewards/real": -0.2983424961566925, + "step": 2310 + }, + { + "epoch": 1.48, + "learning_rate": 2.806872037914692e-07, + "logits/generated": -0.5363454818725586, + "logits/real": -0.6708102822303772, + "logps/generated": -924.869140625, + "logps/real": -158.69607543945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.98878860473633, + "rewards/margins": 50.51224899291992, + "rewards/real": -0.4765354096889496, + "step": 2320 + }, + { + "epoch": 1.49, + "learning_rate": 2.795023696682464e-07, + "logits/generated": -0.5462719202041626, + "logits/real": -0.6856478452682495, + "logps/generated": -908.8020629882812, + "logps/real": -161.13491821289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.132118225097656, + "rewards/margins": 49.841583251953125, + "rewards/real": -0.2905333936214447, + "step": 2330 + }, + { + "epoch": 1.5, + "learning_rate": 2.783175355450237e-07, + "logits/generated": -0.5182799696922302, + "logits/real": -0.7454923391342163, + "logps/generated": -868.3385009765625, + "logps/real": -131.3604736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.998130798339844, + "rewards/margins": 47.67443084716797, + "rewards/real": -0.32370421290397644, + "step": 2340 + }, + { + "epoch": 1.5, + "learning_rate": 2.7713270142180097e-07, + "logits/generated": -0.46355119347572327, + "logits/real": -0.6795281171798706, + "logps/generated": -893.9700927734375, + "logps/real": -150.07882690429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -49.319942474365234, + "rewards/margins": 48.991004943847656, + "rewards/real": -0.3289386034011841, + "step": 2350 + }, + { + "epoch": 1.51, + "learning_rate": 2.759478672985782e-07, + "logits/generated": -0.477322518825531, + "logits/real": -0.6399216055870056, + "logps/generated": -928.3748779296875, + "logps/real": -148.43809509277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.18648147583008, + "rewards/margins": 53.1119499206543, + "rewards/real": -0.07453130185604095, + "step": 2360 + }, + { + "epoch": 1.52, + "learning_rate": 2.747630331753554e-07, + "logits/generated": -0.47234511375427246, + "logits/real": -0.5880690217018127, + "logps/generated": -861.2603759765625, + "logps/real": -168.40231323242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -47.96990203857422, + "rewards/margins": 47.553489685058594, + "rewards/real": -0.4164124131202698, + "step": 2370 + }, + { + "epoch": 1.52, + "learning_rate": 2.735781990521327e-07, + "logits/generated": -0.4614683985710144, + "logits/real": -0.5570347309112549, + "logps/generated": -906.6154174804688, + "logps/real": -196.1412811279297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -49.976844787597656, + "rewards/margins": 49.53584671020508, + "rewards/real": -0.44100189208984375, + "step": 2380 + }, + { + "epoch": 1.53, + "learning_rate": 2.7239336492890995e-07, + "logits/generated": -0.49024948477745056, + "logits/real": -0.6667122840881348, + "logps/generated": -1002.1686401367188, + "logps/real": -143.6676788330078, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.17181396484375, + "rewards/margins": 58.65046310424805, + "rewards/real": -0.521342933177948, + "step": 2390 + }, + { + "epoch": 1.54, + "learning_rate": 2.7120853080568717e-07, + "logits/generated": -0.4796825051307678, + "logits/real": -0.701050877571106, + "logps/generated": -962.9171752929688, + "logps/real": -143.58999633789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.36942672729492, + "rewards/margins": 54.066436767578125, + "rewards/real": -0.30298811197280884, + "step": 2400 + }, + { + "epoch": 1.54, + "learning_rate": 2.7002369668246444e-07, + "logits/generated": -0.49393147230148315, + "logits/real": -0.6975389719009399, + "logps/generated": -1020.3138427734375, + "logps/real": -122.83097839355469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.38806915283203, + "rewards/margins": 60.000091552734375, + "rewards/real": -0.3879725933074951, + "step": 2410 + }, + { + "epoch": 1.55, + "learning_rate": 2.688388625592417e-07, + "logits/generated": -0.4902682900428772, + "logits/real": -0.6860643625259399, + "logps/generated": -1019.5838012695312, + "logps/real": -162.5839080810547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.252159118652344, + "rewards/margins": 60.7586669921875, + "rewards/real": -0.49349674582481384, + "step": 2420 + }, + { + "epoch": 1.55, + "learning_rate": 2.6765402843601894e-07, + "logits/generated": -0.45780739188194275, + "logits/real": -0.6357568502426147, + "logps/generated": -1011.2120971679688, + "logps/real": -139.41915893554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.23823165893555, + "rewards/margins": 58.670433044433594, + "rewards/real": -0.567794919013977, + "step": 2430 + }, + { + "epoch": 1.56, + "learning_rate": 2.664691943127962e-07, + "logits/generated": -0.4731730818748474, + "logits/real": -0.7192245721817017, + "logps/generated": -993.3472900390625, + "logps/real": -147.27291870117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.850547790527344, + "rewards/margins": 57.4837532043457, + "rewards/real": -0.3668076992034912, + "step": 2440 + }, + { + "epoch": 1.57, + "learning_rate": 2.6528436018957343e-07, + "logits/generated": -0.44004377722740173, + "logits/real": -0.6283164620399475, + "logps/generated": -987.6051025390625, + "logps/real": -150.0260009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.029502868652344, + "rewards/margins": 58.68867111206055, + "rewards/real": -0.34083858132362366, + "step": 2450 + }, + { + "epoch": 1.57, + "learning_rate": 2.640995260663507e-07, + "logits/generated": -0.49537092447280884, + "logits/real": -0.7183640003204346, + "logps/generated": -1005.8214721679688, + "logps/real": -139.62448120117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.904502868652344, + "rewards/margins": 57.10654830932617, + "rewards/real": -0.7979534864425659, + "step": 2460 + }, + { + "epoch": 1.58, + "learning_rate": 2.629146919431279e-07, + "logits/generated": -0.4345122277736664, + "logits/real": -0.6478680968284607, + "logps/generated": -1020.6052856445312, + "logps/real": -167.0375213623047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.1839599609375, + "rewards/margins": 59.437049865722656, + "rewards/real": -0.7469125986099243, + "step": 2470 + }, + { + "epoch": 1.59, + "learning_rate": 2.617298578199052e-07, + "logits/generated": -0.5310551524162292, + "logits/real": -0.6770363450050354, + "logps/generated": -997.2672119140625, + "logps/real": -142.95449829101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.05806350708008, + "rewards/margins": 59.3278923034668, + "rewards/real": -0.7301737666130066, + "step": 2480 + }, + { + "epoch": 1.59, + "learning_rate": 2.6054502369668247e-07, + "logits/generated": -0.49158763885498047, + "logits/real": -0.7231532335281372, + "logps/generated": -1001.833984375, + "logps/real": -135.50466918945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.535316467285156, + "rewards/margins": 60.20942306518555, + "rewards/real": -0.3258832097053528, + "step": 2490 + }, + { + "epoch": 1.6, + "learning_rate": 2.5936018957345974e-07, + "logits/generated": -0.4792296886444092, + "logits/real": -0.7686340808868408, + "logps/generated": -935.7185668945312, + "logps/real": -144.28750610351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -50.79018020629883, + "rewards/margins": 50.41889190673828, + "rewards/real": -0.37129008769989014, + "step": 2500 + }, + { + "epoch": 1.61, + "learning_rate": 2.5817535545023696e-07, + "logits/generated": -0.45573297142982483, + "logits/real": -0.6030322909355164, + "logps/generated": -961.43896484375, + "logps/real": -175.4803466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.09404754638672, + "rewards/margins": 57.536399841308594, + "rewards/real": -0.557651698589325, + "step": 2510 + }, + { + "epoch": 1.61, + "learning_rate": 2.5699052132701423e-07, + "logits/generated": -0.4555717408657074, + "logits/real": -0.6126461625099182, + "logps/generated": -971.4552612304688, + "logps/real": -152.1224822998047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.64960479736328, + "rewards/margins": 56.96089553833008, + "rewards/real": -0.6887052655220032, + "step": 2520 + }, + { + "epoch": 1.62, + "learning_rate": 2.5580568720379145e-07, + "logits/generated": -0.37829676270484924, + "logits/real": -0.6520699262619019, + "logps/generated": -1022.7066650390625, + "logps/real": -145.3723602294922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.4852294921875, + "rewards/margins": 64.01722717285156, + "rewards/real": -0.46799802780151367, + "step": 2530 + }, + { + "epoch": 1.63, + "learning_rate": 2.5462085308056867e-07, + "logits/generated": -0.4396567940711975, + "logits/real": -0.6784273982048035, + "logps/generated": -990.3728637695312, + "logps/real": -128.4115447998047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.081809997558594, + "rewards/margins": 57.558265686035156, + "rewards/real": -0.523552417755127, + "step": 2540 + }, + { + "epoch": 1.63, + "learning_rate": 2.5343601895734595e-07, + "logits/generated": -0.45570698380470276, + "logits/real": -0.6596937775611877, + "logps/generated": -939.7443237304688, + "logps/real": -153.6055145263672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.053611755371094, + "rewards/margins": 52.287078857421875, + "rewards/real": -0.7665325403213501, + "step": 2550 + }, + { + "epoch": 1.64, + "learning_rate": 2.522511848341232e-07, + "logits/generated": -0.46676602959632874, + "logits/real": -0.6501291394233704, + "logps/generated": -935.5755615234375, + "logps/real": -158.39944458007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.2735481262207, + "rewards/margins": 52.781654357910156, + "rewards/real": -0.491886705160141, + "step": 2560 + }, + { + "epoch": 1.64, + "learning_rate": 2.510663507109005e-07, + "logits/generated": -0.40136367082595825, + "logits/real": -0.6050557494163513, + "logps/generated": -951.0081176757812, + "logps/real": -167.49505615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -54.12383270263672, + "rewards/margins": 53.68292236328125, + "rewards/real": -0.4409194886684418, + "step": 2570 + }, + { + "epoch": 1.65, + "learning_rate": 2.498815165876777e-07, + "logits/generated": -0.42623743414878845, + "logits/real": -0.5959832668304443, + "logps/generated": -1012.42724609375, + "logps/real": -158.5472412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.254615783691406, + "rewards/margins": 58.894569396972656, + "rewards/real": -0.3600441813468933, + "step": 2580 + }, + { + "epoch": 1.66, + "learning_rate": 2.48696682464455e-07, + "logits/generated": -0.43621063232421875, + "logits/real": -0.6673040390014648, + "logps/generated": -950.0224609375, + "logps/real": -131.24407958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.44426727294922, + "rewards/margins": 54.97258758544922, + "rewards/real": -0.47168129682540894, + "step": 2590 + }, + { + "epoch": 1.66, + "learning_rate": 2.475118483412322e-07, + "logits/generated": -0.4405759871006012, + "logits/real": -0.6971568465232849, + "logps/generated": -993.9846801757812, + "logps/real": -135.9408416748047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.888824462890625, + "rewards/margins": 57.3767204284668, + "rewards/real": -0.5121084451675415, + "step": 2600 + }, + { + "epoch": 1.67, + "learning_rate": 2.463270142180095e-07, + "logits/generated": -0.43571940064430237, + "logits/real": -0.5744475722312927, + "logps/generated": -1015.73095703125, + "logps/real": -158.0771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.4149055480957, + "rewards/margins": 60.82659149169922, + "rewards/real": -0.5883184671401978, + "step": 2610 + }, + { + "epoch": 1.68, + "learning_rate": 2.451421800947867e-07, + "logits/generated": -0.4443763196468353, + "logits/real": -0.6300617456436157, + "logps/generated": -948.5537109375, + "logps/real": -138.87777709960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -53.80131149291992, + "rewards/margins": 53.32474899291992, + "rewards/real": -0.4765622019767761, + "step": 2620 + }, + { + "epoch": 1.68, + "learning_rate": 2.4395734597156397e-07, + "logits/generated": -0.4608997702598572, + "logits/real": -0.64490807056427, + "logps/generated": -968.9459228515625, + "logps/real": -141.3875732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -56.779457092285156, + "rewards/margins": 56.22953414916992, + "rewards/real": -0.5499221682548523, + "step": 2630 + }, + { + "epoch": 1.69, + "learning_rate": 2.4277251184834124e-07, + "logits/generated": -0.4236997663974762, + "logits/real": -0.6387981176376343, + "logps/generated": -1061.720947265625, + "logps/real": -142.55154418945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.07857513427734, + "rewards/margins": 64.67992401123047, + "rewards/real": -0.39864128828048706, + "step": 2640 + }, + { + "epoch": 1.7, + "learning_rate": 2.4158767772511846e-07, + "logits/generated": -0.4931492805480957, + "logits/real": -0.6415807008743286, + "logps/generated": -936.0066528320312, + "logps/real": -148.57513427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.94257736206055, + "rewards/margins": 52.371917724609375, + "rewards/real": -0.5706599950790405, + "step": 2650 + }, + { + "epoch": 1.7, + "learning_rate": 2.4040284360189573e-07, + "logits/generated": -0.463541179895401, + "logits/real": -0.642052412033081, + "logps/generated": -989.3040771484375, + "logps/real": -163.30599975585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -58.35044479370117, + "rewards/margins": 57.896202087402344, + "rewards/real": -0.4542439877986908, + "step": 2660 + }, + { + "epoch": 1.71, + "learning_rate": 2.39218009478673e-07, + "logits/generated": -0.43307337164878845, + "logits/real": -0.7004517316818237, + "logps/generated": -1077.06884765625, + "logps/real": -144.35037231445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.49039459228516, + "rewards/margins": 63.91597366333008, + "rewards/real": -0.5744192004203796, + "step": 2670 + }, + { + "epoch": 1.71, + "learning_rate": 2.3803317535545023e-07, + "logits/generated": -0.4095051884651184, + "logits/real": -0.6760072112083435, + "logps/generated": -931.9710693359375, + "logps/real": -144.9515380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.00127029418945, + "rewards/margins": 54.52852249145508, + "rewards/real": -0.4727482795715332, + "step": 2680 + }, + { + "epoch": 1.72, + "learning_rate": 2.3684834123222747e-07, + "logits/generated": -0.4472767412662506, + "logits/real": -0.5491870641708374, + "logps/generated": -999.7566528320312, + "logps/real": -166.29904174804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.471107482910156, + "rewards/margins": 58.70185089111328, + "rewards/real": -0.7692559361457825, + "step": 2690 + }, + { + "epoch": 1.73, + "learning_rate": 2.3566350710900475e-07, + "logits/generated": -0.41070666909217834, + "logits/real": -0.5636172890663147, + "logps/generated": -1065.25537109375, + "logps/real": -148.3241729736328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.23228454589844, + "rewards/margins": 66.70288848876953, + "rewards/real": -0.5294026732444763, + "step": 2700 + }, + { + "epoch": 1.73, + "learning_rate": 2.3447867298578197e-07, + "logits/generated": -0.46938830614089966, + "logits/real": -0.667069137096405, + "logps/generated": -994.6066284179688, + "logps/real": -160.06761169433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.15541458129883, + "rewards/margins": 59.66267013549805, + "rewards/real": -0.49274301528930664, + "step": 2710 + }, + { + "epoch": 1.74, + "learning_rate": 2.3329383886255924e-07, + "logits/generated": -0.4746991991996765, + "logits/real": -0.6093307733535767, + "logps/generated": -992.4873046875, + "logps/real": -160.92660522460938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.234031677246094, + "rewards/margins": 58.680206298828125, + "rewards/real": -0.5538274049758911, + "step": 2720 + }, + { + "epoch": 1.75, + "learning_rate": 2.3210900473933649e-07, + "logits/generated": -0.4014422297477722, + "logits/real": -0.6751469373703003, + "logps/generated": -929.3016357421875, + "logps/real": -150.0366973876953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.02751922607422, + "rewards/margins": 54.57123565673828, + "rewards/real": -0.4562840461730957, + "step": 2730 + }, + { + "epoch": 1.75, + "learning_rate": 2.3092417061611373e-07, + "logits/generated": -0.44085001945495605, + "logits/real": -0.6542856693267822, + "logps/generated": -1020.89404296875, + "logps/real": -162.69509887695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.03368377685547, + "rewards/margins": 61.63750076293945, + "rewards/real": -0.3961876928806305, + "step": 2740 + }, + { + "epoch": 1.76, + "learning_rate": 2.2973933649289098e-07, + "logits/generated": -0.45564159750938416, + "logits/real": -0.701114296913147, + "logps/generated": -1048.5789794921875, + "logps/real": -124.47825622558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.25930786132812, + "rewards/margins": 64.69044494628906, + "rewards/real": -0.5688632130622864, + "step": 2750 + }, + { + "epoch": 1.77, + "learning_rate": 2.2855450236966822e-07, + "logits/generated": -0.5452786087989807, + "logits/real": -0.6592291593551636, + "logps/generated": -1069.452880859375, + "logps/real": -159.27734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.70073699951172, + "rewards/margins": 65.43223571777344, + "rewards/real": -0.26849886775016785, + "step": 2760 + }, + { + "epoch": 1.77, + "learning_rate": 2.273696682464455e-07, + "logits/generated": -0.46791744232177734, + "logits/real": -0.6787043213844299, + "logps/generated": -861.5823974609375, + "logps/real": -147.97743225097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -45.79942321777344, + "rewards/margins": 45.491188049316406, + "rewards/real": -0.30823782086372375, + "step": 2770 + }, + { + "epoch": 1.78, + "learning_rate": 2.2618483412322272e-07, + "logits/generated": -0.41990095376968384, + "logits/real": -0.6832523345947266, + "logps/generated": -932.8555908203125, + "logps/real": -131.0704345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -52.52861785888672, + "rewards/margins": 52.190757751464844, + "rewards/real": -0.3378532826900482, + "step": 2780 + }, + { + "epoch": 1.79, + "learning_rate": 2.25e-07, + "logits/generated": -0.4100368916988373, + "logits/real": -0.5835294723510742, + "logps/generated": -1038.96630859375, + "logps/real": -184.62283325195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.66301727294922, + "rewards/margins": 62.26544952392578, + "rewards/real": -0.39757412672042847, + "step": 2790 + }, + { + "epoch": 1.79, + "learning_rate": 2.2381516587677724e-07, + "logits/generated": -0.44541412591934204, + "logits/real": -0.6417989730834961, + "logps/generated": -942.4816284179688, + "logps/real": -173.33139038085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -55.49482345581055, + "rewards/margins": 55.00482177734375, + "rewards/real": -0.49000295996665955, + "step": 2800 + }, + { + "epoch": 1.8, + "learning_rate": 2.226303317535545e-07, + "logits/generated": -0.44918951392173767, + "logits/real": -0.6756407618522644, + "logps/generated": -1055.998779296875, + "logps/real": -164.0789337158203, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.55057525634766, + "rewards/margins": 63.80998992919922, + "rewards/real": -0.7405800819396973, + "step": 2810 + }, + { + "epoch": 1.8, + "learning_rate": 2.2144549763033173e-07, + "logits/generated": -0.3896518051624298, + "logits/real": -0.6300621628761292, + "logps/generated": -971.3762817382812, + "logps/real": -116.8947525024414, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -56.8530158996582, + "rewards/margins": 56.00217819213867, + "rewards/real": -0.8508402705192566, + "step": 2820 + }, + { + "epoch": 1.81, + "learning_rate": 2.20260663507109e-07, + "logits/generated": -0.40912383794784546, + "logits/real": -0.571279764175415, + "logps/generated": -1047.6051025390625, + "logps/real": -176.30783081054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.89017486572266, + "rewards/margins": 67.03627014160156, + "rewards/real": -0.8538981676101685, + "step": 2830 + }, + { + "epoch": 1.82, + "learning_rate": 2.1907582938388625e-07, + "logits/generated": -0.3686346113681793, + "logits/real": -0.6175917387008667, + "logps/generated": -1123.7552490234375, + "logps/real": -157.916748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.57915496826172, + "rewards/margins": 69.48426055908203, + "rewards/real": -1.0949029922485352, + "step": 2840 + }, + { + "epoch": 1.82, + "learning_rate": 2.178909952606635e-07, + "logits/generated": -0.4202900826931, + "logits/real": -0.6336459517478943, + "logps/generated": -1137.767822265625, + "logps/real": -124.10030364990234, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.68309783935547, + "rewards/margins": 70.78590393066406, + "rewards/real": -0.8971970677375793, + "step": 2850 + }, + { + "epoch": 1.83, + "learning_rate": 2.1670616113744074e-07, + "logits/generated": -0.3994109034538269, + "logits/real": -0.634803831577301, + "logps/generated": -1005.7017822265625, + "logps/real": -169.7325439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.44282150268555, + "rewards/margins": 58.54929733276367, + "rewards/real": -0.8935245275497437, + "step": 2860 + }, + { + "epoch": 1.84, + "learning_rate": 2.15521327014218e-07, + "logits/generated": -0.3749557137489319, + "logits/real": -0.6287773251533508, + "logps/generated": -1105.1219482421875, + "logps/real": -147.91397094726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.544677734375, + "rewards/margins": 68.75887298583984, + "rewards/real": -0.785801351070404, + "step": 2870 + }, + { + "epoch": 1.84, + "learning_rate": 2.1433649289099526e-07, + "logits/generated": -0.4119029641151428, + "logits/real": -0.5460438132286072, + "logps/generated": -1043.18310546875, + "logps/real": -165.08152770996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.2020263671875, + "rewards/margins": 62.2581672668457, + "rewards/real": -0.9438508749008179, + "step": 2880 + }, + { + "epoch": 1.85, + "learning_rate": 2.131516587677725e-07, + "logits/generated": -0.40759310126304626, + "logits/real": -0.5606673955917358, + "logps/generated": -1038.422119140625, + "logps/real": -154.99227905273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.8538818359375, + "rewards/margins": 61.08037567138672, + "rewards/real": -0.77350914478302, + "step": 2890 + }, + { + "epoch": 1.86, + "learning_rate": 2.1196682464454975e-07, + "logits/generated": -0.40789279341697693, + "logits/real": -0.6496740579605103, + "logps/generated": -1112.929931640625, + "logps/real": -132.43563842773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.68193054199219, + "rewards/margins": 69.83362579345703, + "rewards/real": -0.8483074903488159, + "step": 2900 + }, + { + "epoch": 1.86, + "learning_rate": 2.10781990521327e-07, + "logits/generated": -0.37787383794784546, + "logits/real": -0.5792855024337769, + "logps/generated": -992.4094848632812, + "logps/real": -154.80380249023438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.6590461730957, + "rewards/margins": 56.728843688964844, + "rewards/real": -0.9301955103874207, + "step": 2910 + }, + { + "epoch": 1.87, + "learning_rate": 2.0959715639810427e-07, + "logits/generated": -0.4275107979774475, + "logits/real": -0.6507991552352905, + "logps/generated": -1179.656982421875, + "logps/real": -151.2890167236328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.20372009277344, + "rewards/margins": 75.1134262084961, + "rewards/real": -1.0902981758117676, + "step": 2920 + }, + { + "epoch": 1.87, + "learning_rate": 2.0841232227488152e-07, + "logits/generated": -0.4114235043525696, + "logits/real": -0.6667225360870361, + "logps/generated": -1071.714599609375, + "logps/real": -133.47251892089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.06950378417969, + "rewards/margins": 67.27735137939453, + "rewards/real": -0.7921562790870667, + "step": 2930 + }, + { + "epoch": 1.88, + "learning_rate": 2.0722748815165874e-07, + "logits/generated": -0.41574984788894653, + "logits/real": -0.6365878582000732, + "logps/generated": -1119.65087890625, + "logps/real": -142.26431274414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.78968811035156, + "rewards/margins": 70.04450988769531, + "rewards/real": -0.745174765586853, + "step": 2940 + }, + { + "epoch": 1.89, + "learning_rate": 2.06042654028436e-07, + "logits/generated": -0.37902599573135376, + "logits/real": -0.5311517119407654, + "logps/generated": -1179.3172607421875, + "logps/real": -160.26150512695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.18898010253906, + "rewards/margins": 72.45357513427734, + "rewards/real": -0.7354053258895874, + "step": 2950 + }, + { + "epoch": 1.89, + "learning_rate": 2.0485781990521326e-07, + "logits/generated": -0.3815317153930664, + "logits/real": -0.5580254793167114, + "logps/generated": -1023.4977416992188, + "logps/real": -177.36837768554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.73035430908203, + "rewards/margins": 61.690452575683594, + "rewards/real": -1.0398961305618286, + "step": 2960 + }, + { + "epoch": 1.9, + "learning_rate": 2.0367298578199053e-07, + "logits/generated": -0.44472736120224, + "logits/real": -0.6714332699775696, + "logps/generated": -1140.576416015625, + "logps/real": -164.40725708007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.26954650878906, + "rewards/margins": 71.59504699707031, + "rewards/real": -0.6745188236236572, + "step": 2970 + }, + { + "epoch": 1.91, + "learning_rate": 2.0248815165876775e-07, + "logits/generated": -0.4064870774745941, + "logits/real": -0.6100367903709412, + "logps/generated": -1150.54833984375, + "logps/real": -160.94125366210938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.72151184082031, + "rewards/margins": 74.99349975585938, + "rewards/real": -0.7280232906341553, + "step": 2980 + }, + { + "epoch": 1.91, + "learning_rate": 2.0130331753554502e-07, + "logits/generated": -0.4334731698036194, + "logits/real": -0.6273586750030518, + "logps/generated": -1114.251220703125, + "logps/real": -167.9752655029297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.68013000488281, + "rewards/margins": 68.87593078613281, + "rewards/real": -0.8041984438896179, + "step": 2990 + }, + { + "epoch": 1.92, + "learning_rate": 2.0011848341232227e-07, + "logits/generated": -0.37390297651290894, + "logits/real": -0.5925924777984619, + "logps/generated": -1171.7125244140625, + "logps/real": -151.46539306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.30975341796875, + "rewards/margins": 75.49763488769531, + "rewards/real": -0.8121153712272644, + "step": 3000 + }, + { + "epoch": 1.93, + "learning_rate": 1.9893364928909952e-07, + "logits/generated": -0.3266315758228302, + "logits/real": -0.5822888612747192, + "logps/generated": -1063.568115234375, + "logps/real": -150.86004638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.58686828613281, + "rewards/margins": 63.89935302734375, + "rewards/real": -0.6875194311141968, + "step": 3010 + }, + { + "epoch": 1.93, + "learning_rate": 1.9774881516587676e-07, + "logits/generated": -0.38277262449264526, + "logits/real": -0.5801711678504944, + "logps/generated": -1175.657470703125, + "logps/real": -141.32138061523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.63087463378906, + "rewards/margins": 76.90750122070312, + "rewards/real": -0.7233678102493286, + "step": 3020 + }, + { + "epoch": 1.94, + "learning_rate": 1.96563981042654e-07, + "logits/generated": -0.35185354948043823, + "logits/real": -0.6176477670669556, + "logps/generated": -1025.0194091796875, + "logps/real": -130.98770141601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.04143524169922, + "rewards/margins": 61.38140106201172, + "rewards/real": -0.6600403189659119, + "step": 3030 + }, + { + "epoch": 1.94, + "learning_rate": 1.9537914691943128e-07, + "logits/generated": -0.40409189462661743, + "logits/real": -0.596308708190918, + "logps/generated": -1096.9437255859375, + "logps/real": -167.45077514648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.10643768310547, + "rewards/margins": 67.09183502197266, + "rewards/real": -1.014599084854126, + "step": 3040 + }, + { + "epoch": 1.95, + "learning_rate": 1.9419431279620853e-07, + "logits/generated": -0.356533408164978, + "logits/real": -0.5840550661087036, + "logps/generated": -1120.258056640625, + "logps/real": -153.67636108398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.5401840209961, + "rewards/margins": 70.77323913574219, + "rewards/real": -0.7669495940208435, + "step": 3050 + }, + { + "epoch": 1.96, + "learning_rate": 1.9300947867298577e-07, + "logits/generated": -0.39099499583244324, + "logits/real": -0.5445195436477661, + "logps/generated": -1096.966552734375, + "logps/real": -185.20242309570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.42916107177734, + "rewards/margins": 67.67522430419922, + "rewards/real": -0.7539411187171936, + "step": 3060 + }, + { + "epoch": 1.96, + "learning_rate": 1.9182464454976302e-07, + "logits/generated": -0.3674711287021637, + "logits/real": -0.6313090920448303, + "logps/generated": -998.6416015625, + "logps/real": -132.3099365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -59.564292907714844, + "rewards/margins": 58.874427795410156, + "rewards/real": -0.6898680329322815, + "step": 3070 + }, + { + "epoch": 1.97, + "learning_rate": 1.906398104265403e-07, + "logits/generated": -0.3422473669052124, + "logits/real": -0.5261892676353455, + "logps/generated": -1169.02880859375, + "logps/real": -149.87451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.00128173828125, + "rewards/margins": 75.43621063232422, + "rewards/real": -0.5650706887245178, + "step": 3080 + }, + { + "epoch": 1.98, + "learning_rate": 1.8945497630331754e-07, + "logits/generated": -0.40371593832969666, + "logits/real": -0.5681861042976379, + "logps/generated": -989.8043823242188, + "logps/real": -161.1966552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -60.001930236816406, + "rewards/margins": 59.34397506713867, + "rewards/real": -0.6579534411430359, + "step": 3090 + }, + { + "epoch": 1.98, + "learning_rate": 1.8827014218009476e-07, + "logits/generated": -0.3659510016441345, + "logits/real": -0.5965005159378052, + "logps/generated": -1124.320068359375, + "logps/real": -151.7841033935547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.14144897460938, + "rewards/margins": 72.28865051269531, + "rewards/real": -0.8528071641921997, + "step": 3100 + }, + { + "epoch": 1.99, + "learning_rate": 1.8708530805687203e-07, + "logits/generated": -0.33026427030563354, + "logits/real": -0.5088328719139099, + "logps/generated": -1115.7996826171875, + "logps/real": -135.47061157226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.01557159423828, + "rewards/margins": 70.09529113769531, + "rewards/real": -0.9202736020088196, + "step": 3110 + }, + { + "epoch": 2.0, + "learning_rate": 1.8590047393364928e-07, + "logits/generated": -0.35965341329574585, + "logits/real": -0.5917715430259705, + "logps/generated": -1188.393310546875, + "logps/real": -164.9335174560547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -79.32763671875, + "rewards/margins": 78.70462036132812, + "rewards/real": -0.623020589351654, + "step": 3120 + }, + { + "epoch": 2.0, + "learning_rate": 1.8471563981042655e-07, + "logits/generated": -0.3648239076137543, + "logits/real": -0.5664030313491821, + "logps/generated": -1057.7745361328125, + "logps/real": -164.2882080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.43244171142578, + "rewards/margins": 64.76323699951172, + "rewards/real": -0.6691963076591492, + "step": 3130 + }, + { + "epoch": 2.01, + "learning_rate": 1.8353080568720377e-07, + "logits/generated": -0.2945231795310974, + "logits/real": -0.539868950843811, + "logps/generated": -1135.911865234375, + "logps/real": -126.64066314697266, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.74871063232422, + "rewards/margins": 74.0860595703125, + "rewards/real": -0.6626566648483276, + "step": 3140 + }, + { + "epoch": 2.02, + "learning_rate": 1.8234597156398104e-07, + "logits/generated": -0.32069313526153564, + "logits/real": -0.5126243829727173, + "logps/generated": -1169.849365234375, + "logps/real": -147.9727783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.32138061523438, + "rewards/margins": 75.6849136352539, + "rewards/real": -0.6364642381668091, + "step": 3150 + }, + { + "epoch": 2.02, + "learning_rate": 1.811611374407583e-07, + "logits/generated": -0.3039155900478363, + "logits/real": -0.5434777140617371, + "logps/generated": -1120.1385498046875, + "logps/real": -150.61309814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.19499206542969, + "rewards/margins": 71.50361633300781, + "rewards/real": -0.6913769841194153, + "step": 3160 + }, + { + "epoch": 2.03, + "learning_rate": 1.7997630331753554e-07, + "logits/generated": -0.33049115538597107, + "logits/real": -0.5453459620475769, + "logps/generated": -1100.5694580078125, + "logps/real": -168.0435791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.0591812133789, + "rewards/margins": 69.451904296875, + "rewards/real": -0.6072803139686584, + "step": 3170 + }, + { + "epoch": 2.03, + "learning_rate": 1.7879146919431278e-07, + "logits/generated": -0.3514329493045807, + "logits/real": -0.5001763105392456, + "logps/generated": -1134.4805908203125, + "logps/real": -180.07357788085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.09115600585938, + "rewards/margins": 72.2973403930664, + "rewards/real": -0.793817400932312, + "step": 3180 + }, + { + "epoch": 2.04, + "learning_rate": 1.7760663507109003e-07, + "logits/generated": -0.27006787061691284, + "logits/real": -0.4615755081176758, + "logps/generated": -1064.807861328125, + "logps/real": -157.30935668945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.96697998046875, + "rewards/margins": 67.21857452392578, + "rewards/real": -0.7484084963798523, + "step": 3190 + }, + { + "epoch": 2.05, + "learning_rate": 1.764218009478673e-07, + "logits/generated": -0.33314403891563416, + "logits/real": -0.5476816892623901, + "logps/generated": -1177.116943359375, + "logps/real": -133.23471069335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -78.40288543701172, + "rewards/margins": 77.6571044921875, + "rewards/real": -0.7457820177078247, + "step": 3200 + }, + { + "epoch": 2.05, + "learning_rate": 1.7523696682464452e-07, + "logits/generated": -0.3331630825996399, + "logits/real": -0.5389949083328247, + "logps/generated": -1134.275146484375, + "logps/real": -167.80189514160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.55030059814453, + "rewards/margins": 69.66114807128906, + "rewards/real": -0.8891481161117554, + "step": 3210 + }, + { + "epoch": 2.06, + "learning_rate": 1.740521327014218e-07, + "logits/generated": -0.3793897330760956, + "logits/real": -0.5896502733230591, + "logps/generated": -1118.945068359375, + "logps/real": -142.81280517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.79044342041016, + "rewards/margins": 71.0679702758789, + "rewards/real": -0.7224776148796082, + "step": 3220 + }, + { + "epoch": 2.07, + "learning_rate": 1.7286729857819904e-07, + "logits/generated": -0.34748396277427673, + "logits/real": -0.5819012522697449, + "logps/generated": -1131.2802734375, + "logps/real": -144.39434814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.58968353271484, + "rewards/margins": 70.8386001586914, + "rewards/real": -0.7510883212089539, + "step": 3230 + }, + { + "epoch": 2.07, + "learning_rate": 1.7168246445497631e-07, + "logits/generated": -0.35045960545539856, + "logits/real": -0.5766857862472534, + "logps/generated": -1157.075927734375, + "logps/real": -183.49752807617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.64195251464844, + "rewards/margins": 73.91766357421875, + "rewards/real": -0.7242866158485413, + "step": 3240 + }, + { + "epoch": 2.08, + "learning_rate": 1.7049763033175353e-07, + "logits/generated": -0.36225640773773193, + "logits/real": -0.6122447848320007, + "logps/generated": -1231.66064453125, + "logps/real": -159.02023315429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -81.8363037109375, + "rewards/margins": 81.19940185546875, + "rewards/real": -0.636897623538971, + "step": 3250 + }, + { + "epoch": 2.09, + "learning_rate": 1.693127962085308e-07, + "logits/generated": -0.3631829619407654, + "logits/real": -0.5321738123893738, + "logps/generated": -1131.0858154296875, + "logps/real": -174.25485229492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.96878051757812, + "rewards/margins": 70.31332397460938, + "rewards/real": -0.6554469466209412, + "step": 3260 + }, + { + "epoch": 2.09, + "learning_rate": 1.6812796208530805e-07, + "logits/generated": -0.2980991005897522, + "logits/real": -0.4874647557735443, + "logps/generated": -1105.4827880859375, + "logps/real": -152.40386962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.46055603027344, + "rewards/margins": 65.54535675048828, + "rewards/real": -0.9151935577392578, + "step": 3270 + }, + { + "epoch": 2.1, + "learning_rate": 1.669431279620853e-07, + "logits/generated": -0.28592294454574585, + "logits/real": -0.4887320101261139, + "logps/generated": -1115.578369140625, + "logps/real": -185.43649291992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.00923156738281, + "rewards/margins": 69.16118621826172, + "rewards/real": -0.8480373620986938, + "step": 3280 + }, + { + "epoch": 2.1, + "learning_rate": 1.6575829383886255e-07, + "logits/generated": -0.37043094635009766, + "logits/real": -0.579189658164978, + "logps/generated": -1242.7139892578125, + "logps/real": -150.39120483398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -83.98506927490234, + "rewards/margins": 83.05586242675781, + "rewards/real": -0.9292176961898804, + "step": 3290 + }, + { + "epoch": 2.11, + "learning_rate": 1.645734597156398e-07, + "logits/generated": -0.28565752506256104, + "logits/real": -0.5570210218429565, + "logps/generated": -1072.3984375, + "logps/real": -139.39593505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.31278991699219, + "rewards/margins": 68.64981079101562, + "rewards/real": -0.6629735231399536, + "step": 3300 + }, + { + "epoch": 2.12, + "learning_rate": 1.6338862559241706e-07, + "logits/generated": -0.36683911085128784, + "logits/real": -0.6102225184440613, + "logps/generated": -1144.6790771484375, + "logps/real": -150.28924560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.97947692871094, + "rewards/margins": 74.34928894042969, + "rewards/real": -0.6301820278167725, + "step": 3310 + }, + { + "epoch": 2.12, + "learning_rate": 1.622037914691943e-07, + "logits/generated": -0.3236832916736603, + "logits/real": -0.5270904302597046, + "logps/generated": -1184.956787109375, + "logps/real": -147.99168395996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.87047576904297, + "rewards/margins": 77.00102233886719, + "rewards/real": -0.8694450259208679, + "step": 3320 + }, + { + "epoch": 2.13, + "learning_rate": 1.6101895734597156e-07, + "logits/generated": -0.373867928981781, + "logits/real": -0.5834953784942627, + "logps/generated": -1108.7701416015625, + "logps/real": -163.93594360351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.46839141845703, + "rewards/margins": 69.51911926269531, + "rewards/real": -0.9492788314819336, + "step": 3330 + }, + { + "epoch": 2.14, + "learning_rate": 1.598341232227488e-07, + "logits/generated": -0.3623445928096771, + "logits/real": -0.5558930039405823, + "logps/generated": -1033.65771484375, + "logps/real": -145.7788848876953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -63.11626434326172, + "rewards/margins": 62.55774688720703, + "rewards/real": -0.5585171580314636, + "step": 3340 + }, + { + "epoch": 2.14, + "learning_rate": 1.5864928909952605e-07, + "logits/generated": -0.3441459834575653, + "logits/real": -0.5500830411911011, + "logps/generated": -1076.542724609375, + "logps/real": -142.98782348632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.62294006347656, + "rewards/margins": 69.04898071289062, + "rewards/real": -0.5739551186561584, + "step": 3350 + }, + { + "epoch": 2.15, + "learning_rate": 1.5746445497630332e-07, + "logits/generated": -0.32716110348701477, + "logits/real": -0.5438711047172546, + "logps/generated": -1147.177978515625, + "logps/real": -145.17544555664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.67416381835938, + "rewards/margins": 71.88451385498047, + "rewards/real": -0.7896552085876465, + "step": 3360 + }, + { + "epoch": 2.16, + "learning_rate": 1.5627962085308054e-07, + "logits/generated": -0.3573629856109619, + "logits/real": -0.5217954516410828, + "logps/generated": -1130.4320068359375, + "logps/real": -172.75650024414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.90027618408203, + "rewards/margins": 71.09027862548828, + "rewards/real": -0.8099902868270874, + "step": 3370 + }, + { + "epoch": 2.16, + "learning_rate": 1.5509478672985782e-07, + "logits/generated": -0.34415721893310547, + "logits/real": -0.5328197479248047, + "logps/generated": -956.2267456054688, + "logps/real": -184.76602172851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -57.68220901489258, + "rewards/margins": 56.873924255371094, + "rewards/real": -0.8082860708236694, + "step": 3380 + }, + { + "epoch": 2.17, + "learning_rate": 1.5390995260663506e-07, + "logits/generated": -0.3490820825099945, + "logits/real": -0.5200581550598145, + "logps/generated": -1079.313720703125, + "logps/real": -158.89695739746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.39738464355469, + "rewards/margins": 67.54957580566406, + "rewards/real": -0.8478103876113892, + "step": 3390 + }, + { + "epoch": 2.18, + "learning_rate": 1.5272511848341233e-07, + "logits/generated": -0.32425522804260254, + "logits/real": -0.5338164567947388, + "logps/generated": -1100.94873046875, + "logps/real": -160.40975952148438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.0157699584961, + "rewards/margins": 68.4029312133789, + "rewards/real": -0.6128430366516113, + "step": 3400 + }, + { + "epoch": 2.18, + "learning_rate": 1.5154028436018955e-07, + "logits/generated": -0.3767894208431244, + "logits/real": -0.5962772965431213, + "logps/generated": -1119.630615234375, + "logps/real": -164.57717895507812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.62731170654297, + "rewards/margins": 69.80793762207031, + "rewards/real": -0.8193785548210144, + "step": 3410 + }, + { + "epoch": 2.19, + "learning_rate": 1.5035545023696683e-07, + "logits/generated": -0.3327923119068146, + "logits/real": -0.5231298804283142, + "logps/generated": -1091.9482421875, + "logps/real": -159.0697021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.09928894042969, + "rewards/margins": 66.29541015625, + "rewards/real": -0.8038908243179321, + "step": 3420 + }, + { + "epoch": 2.19, + "learning_rate": 1.4917061611374407e-07, + "logits/generated": -0.37766391038894653, + "logits/real": -0.5465415716171265, + "logps/generated": -1140.1834716796875, + "logps/real": -176.43777465820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.69719696044922, + "rewards/margins": 72.85856628417969, + "rewards/real": -0.8386209607124329, + "step": 3430 + }, + { + "epoch": 2.2, + "learning_rate": 1.4798578199052132e-07, + "logits/generated": -0.3288564682006836, + "logits/real": -0.6394492983818054, + "logps/generated": -1092.682861328125, + "logps/real": -138.92721557617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.9786148071289, + "rewards/margins": 68.30329895019531, + "rewards/real": -0.6753060817718506, + "step": 3440 + }, + { + "epoch": 2.21, + "learning_rate": 1.4680094786729857e-07, + "logits/generated": -0.3352881371974945, + "logits/real": -0.5174044370651245, + "logps/generated": -1148.893798828125, + "logps/real": -157.83375549316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.80946350097656, + "rewards/margins": 74.113037109375, + "rewards/real": -0.6964321136474609, + "step": 3450 + }, + { + "epoch": 2.21, + "learning_rate": 1.456161137440758e-07, + "logits/generated": -0.38338786363601685, + "logits/real": -0.6143153309822083, + "logps/generated": -1172.7335205078125, + "logps/real": -150.6049041748047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.13297271728516, + "rewards/margins": 73.13155364990234, + "rewards/real": -1.0014207363128662, + "step": 3460 + }, + { + "epoch": 2.22, + "learning_rate": 1.4443127962085309e-07, + "logits/generated": -0.34384411573410034, + "logits/real": -0.585628867149353, + "logps/generated": -1103.96923828125, + "logps/real": -153.5775604248047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.13743591308594, + "rewards/margins": 69.50370025634766, + "rewards/real": -0.6337412595748901, + "step": 3470 + }, + { + "epoch": 2.23, + "learning_rate": 1.4324644549763033e-07, + "logits/generated": -0.34684544801712036, + "logits/real": -0.6211697459220886, + "logps/generated": -1069.9642333984375, + "logps/real": -169.10153198242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.88719177246094, + "rewards/margins": 67.12832641601562, + "rewards/real": -0.7588711380958557, + "step": 3480 + }, + { + "epoch": 2.23, + "learning_rate": 1.4206161137440758e-07, + "logits/generated": -0.40978360176086426, + "logits/real": -0.5079740881919861, + "logps/generated": -1203.1060791015625, + "logps/real": -182.2041778564453, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -81.18981170654297, + "rewards/margins": 80.14823913574219, + "rewards/real": -1.0415685176849365, + "step": 3490 + }, + { + "epoch": 2.24, + "learning_rate": 1.4087677725118482e-07, + "logits/generated": -0.3429732024669647, + "logits/real": -0.6365989446640015, + "logps/generated": -1202.0032958984375, + "logps/real": -135.09603881835938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -79.1153793334961, + "rewards/margins": 78.39237976074219, + "rewards/real": -0.7229984998703003, + "step": 3500 + }, + { + "epoch": 2.25, + "learning_rate": 1.396919431279621e-07, + "logits/generated": -0.3157784044742584, + "logits/real": -0.5285521149635315, + "logps/generated": -1134.604248046875, + "logps/real": -162.54391479492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.02388763427734, + "rewards/margins": 71.22061157226562, + "rewards/real": -0.8032848238945007, + "step": 3510 + }, + { + "epoch": 2.25, + "learning_rate": 1.3850710900473934e-07, + "logits/generated": -0.38203898072242737, + "logits/real": -0.549940824508667, + "logps/generated": -1131.8427734375, + "logps/real": -152.9811248779297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.25450897216797, + "rewards/margins": 71.60226440429688, + "rewards/real": -0.6522516012191772, + "step": 3520 + }, + { + "epoch": 2.26, + "learning_rate": 1.3732227488151656e-07, + "logits/generated": -0.3488486707210541, + "logits/real": -0.39327472448349, + "logps/generated": -1084.085205078125, + "logps/real": -202.0915985107422, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.83716583251953, + "rewards/margins": 67.02722930908203, + "rewards/real": -0.8099360466003418, + "step": 3530 + }, + { + "epoch": 2.26, + "learning_rate": 1.3613744075829384e-07, + "logits/generated": -0.29847994446754456, + "logits/real": -0.5526587963104248, + "logps/generated": -1222.8707275390625, + "logps/real": -151.45326232910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -84.52772521972656, + "rewards/margins": 83.77813720703125, + "rewards/real": -0.7495924234390259, + "step": 3540 + }, + { + "epoch": 2.27, + "learning_rate": 1.3495260663507108e-07, + "logits/generated": -0.3559364676475525, + "logits/real": -0.5464509725570679, + "logps/generated": -1149.019775390625, + "logps/real": -144.06759643554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.87704467773438, + "rewards/margins": 72.91664123535156, + "rewards/real": -0.9604147672653198, + "step": 3550 + }, + { + "epoch": 2.28, + "learning_rate": 1.3376777251184836e-07, + "logits/generated": -0.3057738244533539, + "logits/real": -0.5685640573501587, + "logps/generated": -1133.9283447265625, + "logps/real": -161.63894653320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.35575103759766, + "rewards/margins": 73.46749114990234, + "rewards/real": -0.8882623910903931, + "step": 3560 + }, + { + "epoch": 2.28, + "learning_rate": 1.3258293838862558e-07, + "logits/generated": -0.3922407031059265, + "logits/real": -0.6674994230270386, + "logps/generated": -1132.536865234375, + "logps/real": -146.58091735839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.54347229003906, + "rewards/margins": 70.5415267944336, + "rewards/real": -1.0019347667694092, + "step": 3570 + }, + { + "epoch": 2.29, + "learning_rate": 1.3139810426540285e-07, + "logits/generated": -0.33480846881866455, + "logits/real": -0.5054049491882324, + "logps/generated": -1200.821044921875, + "logps/real": -152.25894165039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.86302185058594, + "rewards/margins": 77.09458923339844, + "rewards/real": -0.7684418559074402, + "step": 3580 + }, + { + "epoch": 2.3, + "learning_rate": 1.302132701421801e-07, + "logits/generated": -0.2758890986442566, + "logits/real": -0.5493889451026917, + "logps/generated": -1093.770751953125, + "logps/real": -157.31973266601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.96113586425781, + "rewards/margins": 69.14842987060547, + "rewards/real": -0.8127008676528931, + "step": 3590 + }, + { + "epoch": 2.3, + "learning_rate": 1.2902843601895734e-07, + "logits/generated": -0.3500101566314697, + "logits/real": -0.5593122243881226, + "logps/generated": -1206.6497802734375, + "logps/real": -159.24282836914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -82.6231689453125, + "rewards/margins": 81.7955551147461, + "rewards/real": -0.8276035189628601, + "step": 3600 + }, + { + "epoch": 2.31, + "learning_rate": 1.278436018957346e-07, + "logits/generated": -0.3821162283420563, + "logits/real": -0.6203012466430664, + "logps/generated": -1269.4385986328125, + "logps/real": -147.83021545410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -88.19657897949219, + "rewards/margins": 87.15613555908203, + "rewards/real": -1.0404458045959473, + "step": 3610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2665876777251183e-07, + "logits/generated": -0.4196929931640625, + "logits/real": -0.6896005868911743, + "logps/generated": -1157.632568359375, + "logps/real": -161.3779754638672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.62742614746094, + "rewards/margins": 74.22064208984375, + "rewards/real": -0.40678733587265015, + "step": 3620 + }, + { + "epoch": 2.32, + "learning_rate": 1.254739336492891e-07, + "logits/generated": -0.46330398321151733, + "logits/real": -0.6500687003135681, + "logps/generated": -1221.859130859375, + "logps/real": -170.46438598632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -80.08015441894531, + "rewards/margins": 79.51933288574219, + "rewards/real": -0.5608130693435669, + "step": 3630 + }, + { + "epoch": 2.33, + "learning_rate": 1.2428909952606635e-07, + "logits/generated": -0.4650397300720215, + "logits/real": -0.6424258351325989, + "logps/generated": -1196.016357421875, + "logps/real": -148.45826721191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.94972229003906, + "rewards/margins": 76.64620208740234, + "rewards/real": -0.3035140633583069, + "step": 3640 + }, + { + "epoch": 2.34, + "learning_rate": 1.231042654028436e-07, + "logits/generated": -0.4637749195098877, + "logits/real": -0.6913520097732544, + "logps/generated": -1081.8062744140625, + "logps/real": -143.20822143554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.7381820678711, + "rewards/margins": 68.34100341796875, + "rewards/real": -0.39719128608703613, + "step": 3650 + }, + { + "epoch": 2.34, + "learning_rate": 1.2191943127962085e-07, + "logits/generated": -0.4392772614955902, + "logits/real": -0.5955749750137329, + "logps/generated": -1123.980712890625, + "logps/real": -200.45895385742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.12271881103516, + "rewards/margins": 71.51152038574219, + "rewards/real": -0.6112003326416016, + "step": 3660 + }, + { + "epoch": 2.35, + "learning_rate": 1.207345971563981e-07, + "logits/generated": -0.43859052658081055, + "logits/real": -0.5733956098556519, + "logps/generated": -1162.6119384765625, + "logps/real": -168.77096557617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.10337829589844, + "rewards/margins": 76.67765045166016, + "rewards/real": -0.42572155594825745, + "step": 3670 + }, + { + "epoch": 2.35, + "learning_rate": 1.1954976303317534e-07, + "logits/generated": -0.47363168001174927, + "logits/real": -0.6165460348129272, + "logps/generated": -1090.505859375, + "logps/real": -164.0859832763672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.29020690917969, + "rewards/margins": 69.48956298828125, + "rewards/real": -0.8006424903869629, + "step": 3680 + }, + { + "epoch": 2.36, + "learning_rate": 1.183649289099526e-07, + "logits/generated": -0.41170722246170044, + "logits/real": -0.6370391249656677, + "logps/generated": -1108.5679931640625, + "logps/real": -160.58328247070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.86441040039062, + "rewards/margins": 71.31538391113281, + "rewards/real": -0.549027681350708, + "step": 3690 + }, + { + "epoch": 2.37, + "learning_rate": 1.1718009478672986e-07, + "logits/generated": -0.379972368478775, + "logits/real": -0.6687403321266174, + "logps/generated": -1153.8831787109375, + "logps/real": -136.22607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.22931671142578, + "rewards/margins": 74.69772338867188, + "rewards/real": -0.5315843820571899, + "step": 3700 + }, + { + "epoch": 2.37, + "learning_rate": 1.159952606635071e-07, + "logits/generated": -0.39145171642303467, + "logits/real": -0.5640333294868469, + "logps/generated": -1174.184326171875, + "logps/real": -156.5506591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.92201232910156, + "rewards/margins": 76.40318298339844, + "rewards/real": -0.5188380479812622, + "step": 3710 + }, + { + "epoch": 2.38, + "learning_rate": 1.1481042654028436e-07, + "logits/generated": -0.4219874441623688, + "logits/real": -0.6748846769332886, + "logps/generated": -1189.40771484375, + "logps/real": -150.1600799560547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.53298950195312, + "rewards/margins": 76.97785949707031, + "rewards/real": -0.5551234483718872, + "step": 3720 + }, + { + "epoch": 2.39, + "learning_rate": 1.136255924170616e-07, + "logits/generated": -0.40171951055526733, + "logits/real": -0.5924757122993469, + "logps/generated": -1085.5384521484375, + "logps/real": -151.51275634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.87525177001953, + "rewards/margins": 69.43013763427734, + "rewards/real": -0.4451100826263428, + "step": 3730 + }, + { + "epoch": 2.39, + "learning_rate": 1.1244075829383886e-07, + "logits/generated": -0.46468549966812134, + "logits/real": -0.6485167145729065, + "logps/generated": -1184.935546875, + "logps/real": -141.2152099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.06682586669922, + "rewards/margins": 76.591064453125, + "rewards/real": -0.47575807571411133, + "step": 3740 + }, + { + "epoch": 2.4, + "learning_rate": 1.112559241706161e-07, + "logits/generated": -0.4084410071372986, + "logits/real": -0.5791139602661133, + "logps/generated": -1158.128173828125, + "logps/real": -140.56553649902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.33256530761719, + "rewards/margins": 73.85154724121094, + "rewards/real": -0.48102912306785583, + "step": 3750 + }, + { + "epoch": 2.41, + "learning_rate": 1.1007109004739336e-07, + "logits/generated": -0.3835846781730652, + "logits/real": -0.6427528262138367, + "logps/generated": -1111.537109375, + "logps/real": -120.40766906738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.03950500488281, + "rewards/margins": 72.73836517333984, + "rewards/real": -0.3011349141597748, + "step": 3760 + }, + { + "epoch": 2.41, + "learning_rate": 1.0888625592417061e-07, + "logits/generated": -0.38520628213882446, + "logits/real": -0.5476213693618774, + "logps/generated": -1175.740966796875, + "logps/real": -150.56874084472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.73905944824219, + "rewards/margins": 76.24748992919922, + "rewards/real": -0.4915708601474762, + "step": 3770 + }, + { + "epoch": 2.42, + "learning_rate": 1.0770142180094787e-07, + "logits/generated": -0.4786599576473236, + "logits/real": -0.6190992593765259, + "logps/generated": -1177.5657958984375, + "logps/real": -171.1073455810547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.37752532958984, + "rewards/margins": 74.78016662597656, + "rewards/real": -0.5973631143569946, + "step": 3780 + }, + { + "epoch": 2.42, + "learning_rate": 1.0651658767772511e-07, + "logits/generated": -0.4746522307395935, + "logits/real": -0.6191864609718323, + "logps/generated": -1131.2122802734375, + "logps/real": -170.5677490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.63467407226562, + "rewards/margins": 70.15538024902344, + "rewards/real": -0.4793027341365814, + "step": 3790 + }, + { + "epoch": 2.43, + "learning_rate": 1.0533175355450237e-07, + "logits/generated": -0.4551950991153717, + "logits/real": -0.6525193452835083, + "logps/generated": -1122.6358642578125, + "logps/real": -159.02659606933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.77278137207031, + "rewards/margins": 70.19547271728516, + "rewards/real": -0.5773108005523682, + "step": 3800 + }, + { + "epoch": 2.44, + "learning_rate": 1.0414691943127962e-07, + "logits/generated": -0.4288361668586731, + "logits/real": -0.6204260587692261, + "logps/generated": -1192.144775390625, + "logps/real": -163.85079956054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.51756286621094, + "rewards/margins": 74.76011657714844, + "rewards/real": -0.7574476003646851, + "step": 3810 + }, + { + "epoch": 2.44, + "learning_rate": 1.0296208530805687e-07, + "logits/generated": -0.4227616786956787, + "logits/real": -0.5756683945655823, + "logps/generated": -1093.92529296875, + "logps/real": -168.95413208007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.02093505859375, + "rewards/margins": 66.26923370361328, + "rewards/real": -0.7517085671424866, + "step": 3820 + }, + { + "epoch": 2.45, + "learning_rate": 1.0177725118483411e-07, + "logits/generated": -0.400717556476593, + "logits/real": -0.5438157320022583, + "logps/generated": -1066.676025390625, + "logps/real": -157.6385040283203, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.5630874633789, + "rewards/margins": 65.72784423828125, + "rewards/real": -0.8352400064468384, + "step": 3830 + }, + { + "epoch": 2.46, + "learning_rate": 1.0059241706161137e-07, + "logits/generated": -0.394951194524765, + "logits/real": -0.5915923118591309, + "logps/generated": -1111.5635986328125, + "logps/real": -151.64712524414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.04698181152344, + "rewards/margins": 70.56211853027344, + "rewards/real": -0.4848620295524597, + "step": 3840 + }, + { + "epoch": 2.46, + "learning_rate": 9.940758293838862e-08, + "logits/generated": -0.3858771026134491, + "logits/real": -0.6399182081222534, + "logps/generated": -1137.333740234375, + "logps/real": -133.06719970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.79722595214844, + "rewards/margins": 71.40068054199219, + "rewards/real": -0.3965340256690979, + "step": 3850 + }, + { + "epoch": 2.47, + "learning_rate": 9.822274881516588e-08, + "logits/generated": -0.40112823247909546, + "logits/real": -0.6340306401252747, + "logps/generated": -1152.3155517578125, + "logps/real": -146.1147918701172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.04869079589844, + "rewards/margins": 74.36548614501953, + "rewards/real": -0.6832191944122314, + "step": 3860 + }, + { + "epoch": 2.48, + "learning_rate": 9.703791469194312e-08, + "logits/generated": -0.3981134295463562, + "logits/real": -0.615269660949707, + "logps/generated": -1170.2542724609375, + "logps/real": -125.6851577758789, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.76252746582031, + "rewards/margins": 77.17864990234375, + "rewards/real": -0.5838753581047058, + "step": 3870 + }, + { + "epoch": 2.48, + "learning_rate": 9.585308056872038e-08, + "logits/generated": -0.44058480858802795, + "logits/real": -0.5800412893295288, + "logps/generated": -1036.949951171875, + "logps/real": -154.7644500732422, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.33032989501953, + "rewards/margins": 64.915283203125, + "rewards/real": -0.41504526138305664, + "step": 3880 + }, + { + "epoch": 2.49, + "learning_rate": 9.466824644549763e-08, + "logits/generated": -0.4272391200065613, + "logits/real": -0.5816367864608765, + "logps/generated": -1235.927734375, + "logps/real": -135.92108154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -83.16920471191406, + "rewards/margins": 82.63643646240234, + "rewards/real": -0.5327636003494263, + "step": 3890 + }, + { + "epoch": 2.5, + "learning_rate": 9.348341232227488e-08, + "logits/generated": -0.45391201972961426, + "logits/real": -0.5931220054626465, + "logps/generated": -1152.2362060546875, + "logps/real": -149.9822540283203, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.86837768554688, + "rewards/margins": 74.22323608398438, + "rewards/real": -0.6451278924942017, + "step": 3900 + }, + { + "epoch": 2.5, + "learning_rate": 9.229857819905212e-08, + "logits/generated": -0.42041435837745667, + "logits/real": -0.5749475359916687, + "logps/generated": -1185.0155029296875, + "logps/real": -165.84779357910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.07542419433594, + "rewards/margins": 75.43386840820312, + "rewards/real": -0.6415479183197021, + "step": 3910 + }, + { + "epoch": 2.51, + "learning_rate": 9.111374407582938e-08, + "logits/generated": -0.3552473187446594, + "logits/real": -0.5435065031051636, + "logps/generated": -1080.6820068359375, + "logps/real": -149.6726837158203, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.79393005371094, + "rewards/margins": 67.18496704101562, + "rewards/real": -0.6089592576026917, + "step": 3920 + }, + { + "epoch": 2.51, + "learning_rate": 8.992890995260663e-08, + "logits/generated": -0.36526188254356384, + "logits/real": -0.542805552482605, + "logps/generated": -1186.941650390625, + "logps/real": -147.6852264404297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.81428527832031, + "rewards/margins": 76.37767791748047, + "rewards/real": -0.4366043210029602, + "step": 3930 + }, + { + "epoch": 2.52, + "learning_rate": 8.874407582938389e-08, + "logits/generated": -0.38974112272262573, + "logits/real": -0.6490163207054138, + "logps/generated": -1125.834716796875, + "logps/real": -153.83132934570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.05818176269531, + "rewards/margins": 69.58064270019531, + "rewards/real": -0.4775339961051941, + "step": 3940 + }, + { + "epoch": 2.53, + "learning_rate": 8.755924170616114e-08, + "logits/generated": -0.38700738549232483, + "logits/real": -0.5742595195770264, + "logps/generated": -1097.8369140625, + "logps/real": -169.0182342529297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.09275817871094, + "rewards/margins": 68.44459533691406, + "rewards/real": -0.648157000541687, + "step": 3950 + }, + { + "epoch": 2.53, + "learning_rate": 8.63744075829384e-08, + "logits/generated": -0.3959638476371765, + "logits/real": -0.6299315690994263, + "logps/generated": -1148.005859375, + "logps/real": -154.40768432617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.98236846923828, + "rewards/margins": 74.33345794677734, + "rewards/real": -0.6489164233207703, + "step": 3960 + }, + { + "epoch": 2.54, + "learning_rate": 8.518957345971564e-08, + "logits/generated": -0.3766046166419983, + "logits/real": -0.5962399244308472, + "logps/generated": -1100.2664794921875, + "logps/real": -153.56520080566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.08140563964844, + "rewards/margins": 68.45283508300781, + "rewards/real": -0.6285830736160278, + "step": 3970 + }, + { + "epoch": 2.55, + "learning_rate": 8.40047393364929e-08, + "logits/generated": -0.41984719038009644, + "logits/real": -0.5362011790275574, + "logps/generated": -1218.0125732421875, + "logps/real": -143.78208923339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -80.89471435546875, + "rewards/margins": 80.50052642822266, + "rewards/real": -0.3941938281059265, + "step": 3980 + }, + { + "epoch": 2.55, + "learning_rate": 8.281990521327013e-08, + "logits/generated": -0.38275301456451416, + "logits/real": -0.6942519545555115, + "logps/generated": -1104.48828125, + "logps/real": -130.95724487304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.27180480957031, + "rewards/margins": 68.72615051269531, + "rewards/real": -0.5456700921058655, + "step": 3990 + }, + { + "epoch": 2.56, + "learning_rate": 8.163507109004738e-08, + "logits/generated": -0.369687020778656, + "logits/real": -0.594490110874176, + "logps/generated": -1186.750732421875, + "logps/real": -140.19932556152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.59791564941406, + "rewards/margins": 76.15618133544922, + "rewards/real": -0.44173464179039, + "step": 4000 + }, + { + "epoch": 2.57, + "learning_rate": 8.045023696682464e-08, + "logits/generated": -0.4018821120262146, + "logits/real": -0.6110813617706299, + "logps/generated": -1141.607177734375, + "logps/real": -150.42648315429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.14505767822266, + "rewards/margins": 72.56416320800781, + "rewards/real": -0.5808922052383423, + "step": 4010 + }, + { + "epoch": 2.57, + "learning_rate": 7.926540284360189e-08, + "logits/generated": -0.3948236405849457, + "logits/real": -0.6121063232421875, + "logps/generated": -1248.185302734375, + "logps/real": -166.86651611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -84.28129577636719, + "rewards/margins": 83.68611907958984, + "rewards/real": -0.5951663851737976, + "step": 4020 + }, + { + "epoch": 2.58, + "learning_rate": 7.808056872037915e-08, + "logits/generated": -0.40597429871559143, + "logits/real": -0.6436026692390442, + "logps/generated": -1255.7674560546875, + "logps/real": -145.66281127929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -84.02607727050781, + "rewards/margins": 83.40240478515625, + "rewards/real": -0.6236714124679565, + "step": 4030 + }, + { + "epoch": 2.58, + "learning_rate": 7.689573459715639e-08, + "logits/generated": -0.42012372612953186, + "logits/real": -0.5629431009292603, + "logps/generated": -1173.372314453125, + "logps/real": -168.88525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -79.46331024169922, + "rewards/margins": 78.79911804199219, + "rewards/real": -0.6641994714736938, + "step": 4040 + }, + { + "epoch": 2.59, + "learning_rate": 7.571090047393365e-08, + "logits/generated": -0.41295844316482544, + "logits/real": -0.6122807860374451, + "logps/generated": -1108.602783203125, + "logps/real": -137.6436767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.95465850830078, + "rewards/margins": 70.28472900390625, + "rewards/real": -0.6699261665344238, + "step": 4050 + }, + { + "epoch": 2.6, + "learning_rate": 7.45260663507109e-08, + "logits/generated": -0.4057750105857849, + "logits/real": -0.6003803610801697, + "logps/generated": -1285.003662109375, + "logps/real": -139.14450073242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -89.37120056152344, + "rewards/margins": 88.8534927368164, + "rewards/real": -0.5177055597305298, + "step": 4060 + }, + { + "epoch": 2.6, + "learning_rate": 7.334123222748814e-08, + "logits/generated": -0.3600274324417114, + "logits/real": -0.5848634243011475, + "logps/generated": -1176.391357421875, + "logps/real": -151.9239959716797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.63912200927734, + "rewards/margins": 76.92236328125, + "rewards/real": -0.716761589050293, + "step": 4070 + }, + { + "epoch": 2.61, + "learning_rate": 7.215639810426539e-08, + "logits/generated": -0.40445417165756226, + "logits/real": -0.6599612832069397, + "logps/generated": -1055.158203125, + "logps/real": -162.99879455566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.7494888305664, + "rewards/margins": 66.99449157714844, + "rewards/real": -0.7549879550933838, + "step": 4080 + }, + { + "epoch": 2.62, + "learning_rate": 7.097156398104265e-08, + "logits/generated": -0.368557870388031, + "logits/real": -0.47544288635253906, + "logps/generated": -1161.69970703125, + "logps/real": -145.6846923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.03729248046875, + "rewards/margins": 74.46896362304688, + "rewards/real": -0.5683245062828064, + "step": 4090 + }, + { + "epoch": 2.62, + "learning_rate": 6.97867298578199e-08, + "logits/generated": -0.4004407823085785, + "logits/real": -0.5904231667518616, + "logps/generated": -1147.255126953125, + "logps/real": -144.21981811523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.1914291381836, + "rewards/margins": 73.42829895019531, + "rewards/real": -0.7631380558013916, + "step": 4100 + }, + { + "epoch": 2.63, + "learning_rate": 6.860189573459716e-08, + "logits/generated": -0.42303165793418884, + "logits/real": -0.6458145380020142, + "logps/generated": -1187.7353515625, + "logps/real": -157.00314331054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.56205749511719, + "rewards/margins": 76.46504211425781, + "rewards/real": -1.0970159769058228, + "step": 4110 + }, + { + "epoch": 2.64, + "learning_rate": 6.74170616113744e-08, + "logits/generated": -0.430379718542099, + "logits/real": -0.595691442489624, + "logps/generated": -1177.614990234375, + "logps/real": -149.47544860839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -78.75337219238281, + "rewards/margins": 78.08720397949219, + "rewards/real": -0.666162371635437, + "step": 4120 + }, + { + "epoch": 2.64, + "learning_rate": 6.623222748815166e-08, + "logits/generated": -0.3575670123100281, + "logits/real": -0.5881283283233643, + "logps/generated": -1064.1312255859375, + "logps/real": -139.80990600585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -68.53370666503906, + "rewards/margins": 67.87602996826172, + "rewards/real": -0.6576740741729736, + "step": 4130 + }, + { + "epoch": 2.65, + "learning_rate": 6.504739336492891e-08, + "logits/generated": -0.37114548683166504, + "logits/real": -0.4665864408016205, + "logps/generated": -1096.2255859375, + "logps/real": -132.2154083251953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.34225463867188, + "rewards/margins": 70.71492004394531, + "rewards/real": -0.6273313164710999, + "step": 4140 + }, + { + "epoch": 2.66, + "learning_rate": 6.386255924170615e-08, + "logits/generated": -0.36751076579093933, + "logits/real": -0.5819805264472961, + "logps/generated": -1186.3887939453125, + "logps/real": -146.05813598632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -78.0079116821289, + "rewards/margins": 77.3697280883789, + "rewards/real": -0.6381850838661194, + "step": 4150 + }, + { + "epoch": 2.66, + "learning_rate": 6.26777251184834e-08, + "logits/generated": -0.414253294467926, + "logits/real": -0.6853745579719543, + "logps/generated": -1162.65234375, + "logps/real": -144.77853393554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.9441146850586, + "rewards/margins": 74.01307678222656, + "rewards/real": -0.931043267250061, + "step": 4160 + }, + { + "epoch": 2.67, + "learning_rate": 6.149289099526066e-08, + "logits/generated": -0.42148175835609436, + "logits/real": -0.6784361600875854, + "logps/generated": -1106.80078125, + "logps/real": -149.71484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.97175598144531, + "rewards/margins": 70.25888061523438, + "rewards/real": -0.7128777503967285, + "step": 4170 + }, + { + "epoch": 2.67, + "learning_rate": 6.030805687203791e-08, + "logits/generated": -0.43376749753952026, + "logits/real": -0.555932879447937, + "logps/generated": -1215.3369140625, + "logps/real": -154.15284729003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -78.09486389160156, + "rewards/margins": 77.44525146484375, + "rewards/real": -0.6496086716651917, + "step": 4180 + }, + { + "epoch": 2.68, + "learning_rate": 5.912322274881516e-08, + "logits/generated": -0.4182409346103668, + "logits/real": -0.5983696579933167, + "logps/generated": -1234.3037109375, + "logps/real": -168.16717529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -84.26101684570312, + "rewards/margins": 83.48126983642578, + "rewards/real": -0.7797611951828003, + "step": 4190 + }, + { + "epoch": 2.69, + "learning_rate": 5.793838862559241e-08, + "logits/generated": -0.445539653301239, + "logits/real": -0.670427680015564, + "logps/generated": -1296.377685546875, + "logps/real": -134.7132568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -88.41688537597656, + "rewards/margins": 87.923095703125, + "rewards/real": -0.49379315972328186, + "step": 4200 + }, + { + "epoch": 2.69, + "learning_rate": 5.6753554502369666e-08, + "logits/generated": -0.3767016530036926, + "logits/real": -0.5589041709899902, + "logps/generated": -1065.5745849609375, + "logps/real": -145.93048095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -64.40142059326172, + "rewards/margins": 63.82404708862305, + "rewards/real": -0.577372133731842, + "step": 4210 + }, + { + "epoch": 2.7, + "learning_rate": 5.556872037914691e-08, + "logits/generated": -0.41733822226524353, + "logits/real": -0.6436376571655273, + "logps/generated": -1053.798095703125, + "logps/real": -165.2022247314453, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.1214828491211, + "rewards/margins": 64.43299865722656, + "rewards/real": -0.6884856820106506, + "step": 4220 + }, + { + "epoch": 2.71, + "learning_rate": 5.4383886255924165e-08, + "logits/generated": -0.4003145098686218, + "logits/real": -0.5307371020317078, + "logps/generated": -1084.7730712890625, + "logps/real": -168.169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.28605651855469, + "rewards/margins": 66.21138763427734, + "rewards/real": -1.0746623277664185, + "step": 4230 + }, + { + "epoch": 2.71, + "learning_rate": 5.319905213270142e-08, + "logits/generated": -0.4704248011112213, + "logits/real": -0.6616953611373901, + "logps/generated": -1222.5751953125, + "logps/real": -148.44683837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -79.704345703125, + "rewards/margins": 79.09273529052734, + "rewards/real": -0.6116179823875427, + "step": 4240 + }, + { + "epoch": 2.72, + "learning_rate": 5.201421800947867e-08, + "logits/generated": -0.4243236482143402, + "logits/real": -0.6298291683197021, + "logps/generated": -1416.729736328125, + "logps/real": -142.0489959716797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -100.81539154052734, + "rewards/margins": 100.10444641113281, + "rewards/real": -0.710952639579773, + "step": 4250 + }, + { + "epoch": 2.73, + "learning_rate": 5.082938388625592e-08, + "logits/generated": -0.4102560579776764, + "logits/real": -0.5691145658493042, + "logps/generated": -1115.2730712890625, + "logps/real": -159.29409790039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -70.8335952758789, + "rewards/margins": 70.12803649902344, + "rewards/real": -0.7055586576461792, + "step": 4260 + }, + { + "epoch": 2.73, + "learning_rate": 4.964454976303317e-08, + "logits/generated": -0.4029023051261902, + "logits/real": -0.6073136329650879, + "logps/generated": -1238.347900390625, + "logps/real": -143.02938842773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -82.9701156616211, + "rewards/margins": 82.27519226074219, + "rewards/real": -0.6949158906936646, + "step": 4270 + }, + { + "epoch": 2.74, + "learning_rate": 4.845971563981042e-08, + "logits/generated": -0.42402610182762146, + "logits/real": -0.6584951877593994, + "logps/generated": -1118.9874267578125, + "logps/real": -145.93138122558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.63047790527344, + "rewards/margins": 72.98558044433594, + "rewards/real": -0.644890546798706, + "step": 4280 + }, + { + "epoch": 2.74, + "learning_rate": 4.7274881516587676e-08, + "logits/generated": -0.3934037685394287, + "logits/real": -0.6283634901046753, + "logps/generated": -1259.203369140625, + "logps/real": -153.5712432861328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -86.28514099121094, + "rewards/margins": 85.61091613769531, + "rewards/real": -0.6742227673530579, + "step": 4290 + }, + { + "epoch": 2.75, + "learning_rate": 4.609004739336492e-08, + "logits/generated": -0.4452149271965027, + "logits/real": -0.562778115272522, + "logps/generated": -1178.203369140625, + "logps/real": -167.87570190429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.86197662353516, + "rewards/margins": 76.29110717773438, + "rewards/real": -0.5708707571029663, + "step": 4300 + }, + { + "epoch": 2.76, + "learning_rate": 4.4905213270142176e-08, + "logits/generated": -0.46924668550491333, + "logits/real": -0.5640957355499268, + "logps/generated": -1178.0283203125, + "logps/real": -152.62841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.18916320800781, + "rewards/margins": 74.46082305908203, + "rewards/real": -0.7283350229263306, + "step": 4310 + }, + { + "epoch": 2.76, + "learning_rate": 4.372037914691943e-08, + "logits/generated": -0.4569918215274811, + "logits/real": -0.6253639459609985, + "logps/generated": -1141.238525390625, + "logps/real": -159.03335571289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.83621215820312, + "rewards/margins": 72.23777770996094, + "rewards/real": -0.5984372496604919, + "step": 4320 + }, + { + "epoch": 2.77, + "learning_rate": 4.253554502369668e-08, + "logits/generated": -0.3836295008659363, + "logits/real": -0.5883413553237915, + "logps/generated": -1028.1627197265625, + "logps/real": -152.56336975097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.914405822753906, + "rewards/margins": 62.3019905090332, + "rewards/real": -0.6124156713485718, + "step": 4330 + }, + { + "epoch": 2.78, + "learning_rate": 4.135071090047393e-08, + "logits/generated": -0.43586069345474243, + "logits/real": -0.584862232208252, + "logps/generated": -1256.212646484375, + "logps/real": -182.5783233642578, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -84.49156951904297, + "rewards/margins": 83.60160064697266, + "rewards/real": -0.8899722099304199, + "step": 4340 + }, + { + "epoch": 2.78, + "learning_rate": 4.016587677725118e-08, + "logits/generated": -0.42249807715415955, + "logits/real": -0.6765289902687073, + "logps/generated": -1098.554443359375, + "logps/real": -149.42665100097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.22462463378906, + "rewards/margins": 68.48421478271484, + "rewards/real": -0.740403950214386, + "step": 4350 + }, + { + "epoch": 2.79, + "learning_rate": 3.8981042654028434e-08, + "logits/generated": -0.3722071051597595, + "logits/real": -0.6433338522911072, + "logps/generated": -1152.187255859375, + "logps/real": -135.0558624267578, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.83736419677734, + "rewards/margins": 71.99942016601562, + "rewards/real": -0.8379424810409546, + "step": 4360 + }, + { + "epoch": 2.8, + "learning_rate": 3.779620853080569e-08, + "logits/generated": -0.4465080797672272, + "logits/real": -0.6853441596031189, + "logps/generated": -1219.987548828125, + "logps/real": -149.74484252929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -81.68904876708984, + "rewards/margins": 81.03762817382812, + "rewards/real": -0.6514285802841187, + "step": 4370 + }, + { + "epoch": 2.8, + "learning_rate": 3.661137440758294e-08, + "logits/generated": -0.39111563563346863, + "logits/real": -0.62447589635849, + "logps/generated": -1110.0286865234375, + "logps/real": -130.0288543701172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.36956787109375, + "rewards/margins": 72.62205505371094, + "rewards/real": -0.7475109696388245, + "step": 4380 + }, + { + "epoch": 2.81, + "learning_rate": 3.5426540284360186e-08, + "logits/generated": -0.3793131709098816, + "logits/real": -0.6151835322380066, + "logps/generated": -1089.3216552734375, + "logps/real": -138.77415466308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -67.62622833251953, + "rewards/margins": 67.07869720458984, + "rewards/real": -0.5475287437438965, + "step": 4390 + }, + { + "epoch": 2.82, + "learning_rate": 3.424170616113744e-08, + "logits/generated": -0.41481703519821167, + "logits/real": -0.6037416458129883, + "logps/generated": -1118.58349609375, + "logps/real": -156.24639892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.81546783447266, + "rewards/margins": 71.1466293334961, + "rewards/real": -0.6688372492790222, + "step": 4400 + }, + { + "epoch": 2.82, + "learning_rate": 3.305687203791469e-08, + "logits/generated": -0.38786306977272034, + "logits/real": -0.6748972535133362, + "logps/generated": -1132.7213134765625, + "logps/real": -165.26429748535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.00263977050781, + "rewards/margins": 73.53857421875, + "rewards/real": -0.4640537202358246, + "step": 4410 + }, + { + "epoch": 2.83, + "learning_rate": 3.1872037914691945e-08, + "logits/generated": -0.3895108699798584, + "logits/real": -0.5759503245353699, + "logps/generated": -1006.7896728515625, + "logps/real": -157.9845733642578, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -62.23859786987305, + "rewards/margins": 61.38972091674805, + "rewards/real": -0.8488828539848328, + "step": 4420 + }, + { + "epoch": 2.83, + "learning_rate": 3.068720379146919e-08, + "logits/generated": -0.3578011691570282, + "logits/real": -0.5436447262763977, + "logps/generated": -1215.705078125, + "logps/real": -147.42477416992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -80.29849243164062, + "rewards/margins": 79.59217071533203, + "rewards/real": -0.7063143253326416, + "step": 4430 + }, + { + "epoch": 2.84, + "learning_rate": 2.9502369668246444e-08, + "logits/generated": -0.41877445578575134, + "logits/real": -0.5616232752799988, + "logps/generated": -1106.929931640625, + "logps/real": -175.37808227539062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.43008422851562, + "rewards/margins": 70.78630065917969, + "rewards/real": -0.6437833905220032, + "step": 4440 + }, + { + "epoch": 2.85, + "learning_rate": 2.8317535545023697e-08, + "logits/generated": -0.42272821068763733, + "logits/real": -0.6309406161308289, + "logps/generated": -1138.3070068359375, + "logps/real": -144.12118530273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.61137390136719, + "rewards/margins": 72.8843994140625, + "rewards/real": -0.7269810438156128, + "step": 4450 + }, + { + "epoch": 2.85, + "learning_rate": 2.7132701421800947e-08, + "logits/generated": -0.37885454297065735, + "logits/real": -0.6365025043487549, + "logps/generated": -1212.714111328125, + "logps/real": -144.41900634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -78.62388610839844, + "rewards/margins": 77.66529083251953, + "rewards/real": -0.9586065411567688, + "step": 4460 + }, + { + "epoch": 2.86, + "learning_rate": 2.59478672985782e-08, + "logits/generated": -0.4420732855796814, + "logits/real": -0.5323609113693237, + "logps/generated": -1076.71533203125, + "logps/real": -181.2960968017578, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.17323303222656, + "rewards/margins": 64.45045471191406, + "rewards/real": -0.7227771878242493, + "step": 4470 + }, + { + "epoch": 2.87, + "learning_rate": 2.476303317535545e-08, + "logits/generated": -0.38568204641342163, + "logits/real": -0.5752898454666138, + "logps/generated": -1061.8555908203125, + "logps/real": -170.11355590820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.07365417480469, + "rewards/margins": 65.37210845947266, + "rewards/real": -0.7015471458435059, + "step": 4480 + }, + { + "epoch": 2.87, + "learning_rate": 2.3578199052132702e-08, + "logits/generated": -0.4010697901248932, + "logits/real": -0.5733628273010254, + "logps/generated": -1116.184326171875, + "logps/real": -161.65103149414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -71.5921630859375, + "rewards/margins": 71.0265121459961, + "rewards/real": -0.5656577944755554, + "step": 4490 + }, + { + "epoch": 2.88, + "learning_rate": 2.239336492890995e-08, + "logits/generated": -0.4115443229675293, + "logits/real": -0.5424914360046387, + "logps/generated": -1180.7628173828125, + "logps/real": -185.47634887695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.3253402709961, + "rewards/margins": 76.40950012207031, + "rewards/real": -0.9158375859260559, + "step": 4500 + }, + { + "epoch": 2.89, + "learning_rate": 2.1208530805687202e-08, + "logits/generated": -0.42339619994163513, + "logits/real": -0.6375981569290161, + "logps/generated": -1170.9703369140625, + "logps/real": -131.23190307617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.03814697265625, + "rewards/margins": 76.52554321289062, + "rewards/real": -0.5126058459281921, + "step": 4510 + }, + { + "epoch": 2.89, + "learning_rate": 2.002369668246445e-08, + "logits/generated": -0.42008423805236816, + "logits/real": -0.6603757739067078, + "logps/generated": -1106.7174072265625, + "logps/real": -132.1379852294922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.35577392578125, + "rewards/margins": 68.54685974121094, + "rewards/real": -0.8089267611503601, + "step": 4520 + }, + { + "epoch": 2.9, + "learning_rate": 1.8838862559241704e-08, + "logits/generated": -0.4159209132194519, + "logits/real": -0.6070636510848999, + "logps/generated": -1108.8184814453125, + "logps/real": -174.39251708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.12606811523438, + "rewards/margins": 68.53681945800781, + "rewards/real": -0.5892479419708252, + "step": 4530 + }, + { + "epoch": 2.9, + "learning_rate": 1.7654028436018954e-08, + "logits/generated": -0.41173315048217773, + "logits/real": -0.48643770813941956, + "logps/generated": -1070.396728515625, + "logps/real": -191.44134521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -65.99903869628906, + "rewards/margins": 65.2906265258789, + "rewards/real": -0.7084180116653442, + "step": 4540 + }, + { + "epoch": 2.91, + "learning_rate": 1.6469194312796207e-08, + "logits/generated": -0.3698303997516632, + "logits/real": -0.6320183277130127, + "logps/generated": -1076.8929443359375, + "logps/real": -139.63314819335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.975830078125, + "rewards/margins": 66.27364349365234, + "rewards/real": -0.7021877765655518, + "step": 4550 + }, + { + "epoch": 2.92, + "learning_rate": 1.528436018957346e-08, + "logits/generated": -0.3886292278766632, + "logits/real": -0.615139365196228, + "logps/generated": -1140.2864990234375, + "logps/real": -145.85720825195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.76911163330078, + "rewards/margins": 74.237060546875, + "rewards/real": -0.5320545434951782, + "step": 4560 + }, + { + "epoch": 2.92, + "learning_rate": 1.409952606635071e-08, + "logits/generated": -0.4748724400997162, + "logits/real": -0.6143825650215149, + "logps/generated": -1171.03076171875, + "logps/real": -139.5336151123047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -72.67819213867188, + "rewards/margins": 72.13270568847656, + "rewards/real": -0.5454872846603394, + "step": 4570 + }, + { + "epoch": 2.93, + "learning_rate": 1.2914691943127961e-08, + "logits/generated": -0.39133062958717346, + "logits/real": -0.6907501816749573, + "logps/generated": -1139.4451904296875, + "logps/real": -122.94720458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -73.89988708496094, + "rewards/margins": 73.2684097290039, + "rewards/real": -0.6314736604690552, + "step": 4580 + }, + { + "epoch": 2.94, + "learning_rate": 1.1729857819905212e-08, + "logits/generated": -0.45327988266944885, + "logits/real": -0.6570634245872498, + "logps/generated": -1196.7115478515625, + "logps/real": -150.64051818847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.87831115722656, + "rewards/margins": 77.22245788574219, + "rewards/real": -0.6558529138565063, + "step": 4590 + }, + { + "epoch": 2.94, + "learning_rate": 1.0545023696682464e-08, + "logits/generated": -0.43308648467063904, + "logits/real": -0.6817704439163208, + "logps/generated": -1163.740966796875, + "logps/real": -139.56373596191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -76.80686950683594, + "rewards/margins": 76.22025299072266, + "rewards/real": -0.5866076350212097, + "step": 4600 + }, + { + "epoch": 2.95, + "learning_rate": 9.360189573459715e-09, + "logits/generated": -0.38277140259742737, + "logits/real": -0.6559278964996338, + "logps/generated": -1152.90625, + "logps/real": -166.77056884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.07879638671875, + "rewards/margins": 73.38710021972656, + "rewards/real": -0.6916946172714233, + "step": 4610 + }, + { + "epoch": 2.96, + "learning_rate": 8.175355450236966e-09, + "logits/generated": -0.44269418716430664, + "logits/real": -0.6459895968437195, + "logps/generated": -1183.922607421875, + "logps/real": -158.46505737304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -77.86439514160156, + "rewards/margins": 77.26386260986328, + "rewards/real": -0.6005492806434631, + "step": 4620 + }, + { + "epoch": 2.96, + "learning_rate": 6.990521327014218e-09, + "logits/generated": -0.42995685338974, + "logits/real": -0.6797999739646912, + "logps/generated": -1169.283447265625, + "logps/real": -131.04364013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -75.65510559082031, + "rewards/margins": 75.0411376953125, + "rewards/real": -0.6139676570892334, + "step": 4630 + }, + { + "epoch": 2.97, + "learning_rate": 5.805687203791469e-09, + "logits/generated": -0.4121777415275574, + "logits/real": -0.6116447448730469, + "logps/generated": -1070.22314453125, + "logps/real": -140.3560333251953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.28459167480469, + "rewards/margins": 65.62239074707031, + "rewards/real": -0.6622053384780884, + "step": 4640 + }, + { + "epoch": 2.98, + "learning_rate": 4.62085308056872e-09, + "logits/generated": -0.4175810217857361, + "logits/real": -0.5604298114776611, + "logps/generated": -1161.98583984375, + "logps/real": -162.33438110351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -74.65108489990234, + "rewards/margins": 74.18379211425781, + "rewards/real": -0.4672994017601013, + "step": 4650 + }, + { + "epoch": 2.98, + "learning_rate": 3.4360189573459714e-09, + "logits/generated": -0.3966117799282074, + "logits/real": -0.5953450202941895, + "logps/generated": -1079.524169921875, + "logps/real": -167.3221435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -69.10150909423828, + "rewards/margins": 68.49336242675781, + "rewards/real": -0.6081460118293762, + "step": 4660 + }, + { + "epoch": 2.99, + "learning_rate": 2.2511848341232227e-09, + "logits/generated": -0.4167659282684326, + "logits/real": -0.6169396042823792, + "logps/generated": -1015.7071533203125, + "logps/real": -176.66506958007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -61.38505935668945, + "rewards/margins": 60.835716247558594, + "rewards/real": -0.5493378043174744, + "step": 4670 + }, + { + "epoch": 2.99, + "learning_rate": 1.0663507109004738e-09, + "logits/generated": -0.3958420157432556, + "logits/real": -0.5675554275512695, + "logps/generated": -1055.50341796875, + "logps/real": -149.56039428710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -66.84716796875, + "rewards/margins": 66.41246032714844, + "rewards/real": -0.4347153604030609, + "step": 4680 + }, + { + "epoch": 3.0, + "step": 4689, + "total_flos": 0.0, + "train_loss": 0.010306433322205334, + "train_runtime": 33629.5111, + "train_samples_per_second": 4.46, + "train_steps_per_second": 0.139 + } + ], + "logging_steps": 10, + "max_steps": 4689, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}