{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 4689, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.066098081023454e-09, "logits/generated": -0.6986645460128784, "logits/real": -0.9474660754203796, "logps/generated": -378.9501953125, "logps/real": -127.2445068359375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": -0.7292745113372803, "logits/real": -0.9080196022987366, "logps/generated": -411.4975280761719, "logps/real": -136.8819122314453, "loss": 0.6994, "rewards/accuracies": 0.4305555522441864, "rewards/generated": -0.00470572616904974, "rewards/margins": 0.0034854437690228224, "rewards/real": -0.0012202821671962738, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": -0.7172996997833252, "logits/real": -0.8902201652526855, "logps/generated": -425.13238525390625, "logps/real": -146.6293182373047, "loss": 0.6822, "rewards/accuracies": 0.5625, "rewards/generated": -0.04359797015786171, "rewards/margins": 0.047350525856018066, "rewards/real": 0.003752560820430517, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": -0.7285000681877136, "logits/real": -0.9076566696166992, "logps/generated": -390.41241455078125, "logps/real": -124.2341537475586, "loss": 0.6261, "rewards/accuracies": 0.875, "rewards/generated": -0.1323629468679428, "rewards/margins": 0.12934455275535583, "rewards/real": -0.003018400864675641, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": -0.7392301559448242, "logits/real": -0.8854039311408997, "logps/generated": -411.90313720703125, "logps/real": -156.40731811523438, "loss": 0.4991, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.507174551486969, "rewards/margins": 0.4850993752479553, "rewards/real": -0.022075189277529716, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": -0.7073003053665161, "logits/real": -0.8917710185050964, "logps/generated": -438.045654296875, "logps/real": -150.26516723632812, "loss": 0.3997, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.9068630337715149, "rewards/margins": 0.8774474263191223, "rewards/real": -0.029415583238005638, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": -0.683810830116272, "logits/real": -0.8957662582397461, "logps/generated": -449.07598876953125, "logps/real": -147.19796752929688, "loss": 0.222, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.9475853443145752, "rewards/margins": 1.8720191717147827, "rewards/real": -0.07556610554456711, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": -0.6989277601242065, "logits/real": -0.826370894908905, "logps/generated": -423.2393493652344, "logps/real": -161.0751190185547, "loss": 0.1887, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.234829902648926, "rewards/margins": 2.123378038406372, "rewards/real": -0.1114521399140358, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": -0.7299633622169495, "logits/real": -0.8775332570075989, "logps/generated": -423.95709228515625, "logps/real": -147.74581909179688, "loss": 0.1788, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.318953037261963, "rewards/margins": 2.218860149383545, "rewards/real": -0.10009302943944931, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": -0.7246233224868774, "logits/real": -0.9104539752006531, "logps/generated": -413.73638916015625, "logps/real": -143.83090209960938, "loss": 0.1662, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.487647533416748, "rewards/margins": 2.347053289413452, "rewards/real": -0.1405942738056183, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": -0.7521112561225891, "logits/real": -0.8818603754043579, "logps/generated": -452.70709228515625, "logps/real": -162.36923217773438, "loss": 0.1114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.276683807373047, "rewards/margins": 3.1224722862243652, "rewards/real": -0.15421171486377716, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": -0.7396367788314819, "logits/real": -0.8340644836425781, "logps/generated": -441.98297119140625, "logps/real": -156.94149780273438, "loss": 0.1072, "rewards/accuracies": 1.0, "rewards/generated": -3.906806230545044, "rewards/margins": 3.7491652965545654, "rewards/real": -0.15764120221138, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": -0.7582974433898926, "logits/real": -0.9137656092643738, "logps/generated": -457.762451171875, "logps/real": -158.60690307617188, "loss": 0.0746, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.984735012054443, "rewards/margins": 4.785731315612793, "rewards/real": -0.19900405406951904, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": -0.7004902958869934, "logits/real": -0.8594983220100403, "logps/generated": -452.61187744140625, "logps/real": -145.72874450683594, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/generated": -5.79620361328125, "rewards/margins": 5.550940036773682, "rewards/real": -0.24526312947273254, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": -0.7274501919746399, "logits/real": -0.9405637979507446, "logps/generated": -462.9717712402344, "logps/real": -147.3827667236328, "loss": 0.0653, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.3193254470825195, "rewards/margins": 6.078797340393066, "rewards/real": -0.2405281811952591, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": -0.7242128849029541, "logits/real": -0.9140293002128601, "logps/generated": -474.15216064453125, "logps/real": -157.3037567138672, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/generated": -6.953827857971191, "rewards/margins": 6.571684837341309, "rewards/real": -0.38214248418807983, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": -0.7147258520126343, "logits/real": -0.8815923929214478, "logps/generated": -457.26519775390625, "logps/real": -142.1988067626953, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/generated": -6.94360876083374, "rewards/margins": 6.643794059753418, "rewards/real": -0.29981470108032227, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": -0.7313283085823059, "logits/real": -0.8958312273025513, "logps/generated": -466.23126220703125, "logps/real": -156.481201171875, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/generated": -7.367417812347412, "rewards/margins": 7.1352057456970215, "rewards/real": -0.23221150040626526, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": -0.7426391839981079, "logits/real": -0.8894122838973999, "logps/generated": -474.2499084472656, "logps/real": -148.93968200683594, "loss": 0.045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.244542121887207, "rewards/margins": 6.9434709548950195, "rewards/real": -0.30106985569000244, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": -0.7155178785324097, "logits/real": -0.8063043355941772, "logps/generated": -488.8700256347656, "logps/real": -178.33253479003906, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/generated": -7.651026248931885, "rewards/margins": 7.362033843994141, "rewards/real": -0.28899192810058594, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": -0.7116974592208862, "logits/real": -0.8972026705741882, "logps/generated": -475.73089599609375, "logps/real": -143.96710205078125, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/generated": -8.454703330993652, "rewards/margins": 8.173591613769531, "rewards/real": -0.28111228346824646, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": -0.6575301885604858, "logits/real": -0.8355759382247925, "logps/generated": -517.0919799804688, "logps/real": -158.15267944335938, "loss": 0.0261, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.714963912963867, "rewards/margins": 8.384611129760742, "rewards/real": -0.3303532600402832, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": -0.6825748085975647, "logits/real": -0.9428423643112183, "logps/generated": -481.72381591796875, "logps/real": -139.2545166015625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/generated": -7.92175817489624, "rewards/margins": 7.7295966148376465, "rewards/real": -0.19216081500053406, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": -0.7176781892776489, "logits/real": -0.8804994821548462, "logps/generated": -505.34423828125, "logps/real": -167.8238983154297, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/generated": -9.55711555480957, "rewards/margins": 9.215084075927734, "rewards/real": -0.34203046560287476, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": -0.7456918358802795, "logits/real": -0.9094620943069458, "logps/generated": -521.493408203125, "logps/real": -168.6183319091797, "loss": 0.0309, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.619193077087402, "rewards/margins": 9.36630916595459, "rewards/real": -0.2528838515281677, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": -0.7071075439453125, "logits/real": -0.9580531120300293, "logps/generated": -492.2247009277344, "logps/real": -122.14371490478516, "loss": 0.0247, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.554067611694336, "rewards/margins": 9.357695579528809, "rewards/real": -0.19637097418308258, "step": 250 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": -0.6865926384925842, "logits/real": -0.9246328473091125, "logps/generated": -499.01458740234375, "logps/real": -112.85029602050781, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/generated": -9.010147094726562, "rewards/margins": 8.914213180541992, "rewards/real": -0.0959334522485733, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": -0.7495471239089966, "logits/real": -0.9227398037910461, "logps/generated": -492.08404541015625, "logps/real": -152.34420776367188, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/generated": -9.229729652404785, "rewards/margins": 9.07396125793457, "rewards/real": -0.15576975047588348, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": -0.7224324941635132, "logits/real": -0.9130092859268188, "logps/generated": -490.20587158203125, "logps/real": -147.02711486816406, "loss": 0.0299, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.046536445617676, "rewards/margins": 8.867794036865234, "rewards/real": -0.17874157428741455, "step": 280 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": -0.6551756262779236, "logits/real": -0.856887698173523, "logps/generated": -494.3564453125, "logps/real": -127.67320251464844, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/generated": -9.807455062866211, "rewards/margins": 9.784029960632324, "rewards/real": -0.02342619001865387, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": -0.7393316030502319, "logits/real": -0.8498824238777161, "logps/generated": -507.4498596191406, "logps/real": -155.37051391601562, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/generated": -10.729459762573242, "rewards/margins": 10.669529914855957, "rewards/real": -0.05992986634373665, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": -0.6794149875640869, "logits/real": -0.8570221066474915, "logps/generated": -516.3793334960938, "logps/real": -147.1600799560547, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -11.062250137329102, "rewards/margins": 10.987607955932617, "rewards/real": -0.07464051991701126, "step": 310 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": -0.7035683393478394, "logits/real": -0.8863167762756348, "logps/generated": -511.3915100097656, "logps/real": -155.48513793945312, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -10.503643989562988, "rewards/margins": 10.356426239013672, "rewards/real": -0.1472179889678955, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": -0.6560925245285034, "logits/real": -0.8542153239250183, "logps/generated": -549.3557739257812, "logps/real": -134.815185546875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -13.663198471069336, "rewards/margins": 13.555437088012695, "rewards/real": -0.10776337236166, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": -0.6811632513999939, "logits/real": -0.9368340373039246, "logps/generated": -537.3572387695312, "logps/real": -134.58053588867188, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/generated": -13.095390319824219, "rewards/margins": 12.888254165649414, "rewards/real": -0.20713606476783752, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": -0.7272433042526245, "logits/real": -0.9455550909042358, "logps/generated": -553.9634399414062, "logps/real": -131.4666748046875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -13.890289306640625, "rewards/margins": 13.677050590515137, "rewards/real": -0.21323621273040771, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": -0.7156924605369568, "logits/real": -0.8684523701667786, "logps/generated": -536.7255859375, "logps/real": -173.733154296875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -13.108613967895508, "rewards/margins": 12.777644157409668, "rewards/real": -0.33096957206726074, "step": 360 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": -0.6920727491378784, "logits/real": -0.851031482219696, "logps/generated": -580.4476318359375, "logps/real": -153.07241821289062, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -14.45750617980957, "rewards/margins": 14.292282104492188, "rewards/real": -0.16522422432899475, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": -0.7242365479469299, "logits/real": -0.9248291254043579, "logps/generated": -566.2769775390625, "logps/real": -136.8095245361328, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -14.296048164367676, "rewards/margins": 14.1116361618042, "rewards/real": -0.18441154062747955, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": -0.7522596120834351, "logits/real": -0.9050644636154175, "logps/generated": -568.7861328125, "logps/real": -143.46664428710938, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -16.536224365234375, "rewards/margins": 16.37632942199707, "rewards/real": -0.15989510715007782, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": -0.7549287676811218, "logits/real": -0.944291889667511, "logps/generated": -558.4595947265625, "logps/real": -138.44850158691406, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -16.336772918701172, "rewards/margins": 16.19559669494629, "rewards/real": -0.14117594063282013, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": -0.7194818258285522, "logits/real": -0.9151653051376343, "logps/generated": -603.3551025390625, "logps/real": -128.14102172851562, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -17.891334533691406, "rewards/margins": 17.594852447509766, "rewards/real": -0.2964830994606018, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": -0.7237090468406677, "logits/real": -0.876343846321106, "logps/generated": -554.85302734375, "logps/real": -144.67776489257812, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -15.776510238647461, "rewards/margins": 15.578184127807617, "rewards/real": -0.19832463562488556, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": -0.6880273818969727, "logits/real": -0.8874770402908325, "logps/generated": -574.5953369140625, "logps/real": -144.68075561523438, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -16.195022583007812, "rewards/margins": 16.028963088989258, "rewards/real": -0.16606178879737854, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": -0.6841549873352051, "logits/real": -0.8916375041007996, "logps/generated": -601.9527587890625, "logps/real": -154.18507385253906, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -17.76523208618164, "rewards/margins": 17.5986270904541, "rewards/real": -0.16660475730895996, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": -0.6988117694854736, "logits/real": -0.8081305623054504, "logps/generated": -610.0335693359375, "logps/real": -151.32000732421875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -19.002681732177734, "rewards/margins": 18.782581329345703, "rewards/real": -0.22010159492492676, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": -0.696639895439148, "logits/real": -0.9278604388237, "logps/generated": -585.9586791992188, "logps/real": -161.8017120361328, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -17.64035415649414, "rewards/margins": 17.357894897460938, "rewards/real": -0.282459557056427, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.998815165876776e-07, "logits/generated": -0.6872554421424866, "logits/real": -0.9127834439277649, "logps/generated": -568.6585693359375, "logps/real": -129.33038330078125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -17.379060745239258, "rewards/margins": 17.275171279907227, "rewards/real": -0.10388918966054916, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.98696682464455e-07, "logits/generated": -0.699679970741272, "logits/real": -0.8975842595100403, "logps/generated": -584.9615478515625, "logps/real": -152.40818786621094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -18.11277961730957, "rewards/margins": 17.817615509033203, "rewards/real": -0.2951619029045105, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.975118483412322e-07, "logits/generated": -0.7286016941070557, "logits/real": -0.8225492238998413, "logps/generated": -618.4642333984375, "logps/real": -168.58460998535156, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -19.85270118713379, "rewards/margins": 19.50424575805664, "rewards/real": -0.34845709800720215, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.963270142180094e-07, "logits/generated": -0.7258303761482239, "logits/real": -0.9152861833572388, "logps/generated": -578.2322387695312, "logps/real": -137.53619384765625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -19.372339248657227, "rewards/margins": 19.179141998291016, "rewards/real": -0.19319558143615723, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.951421800947867e-07, "logits/generated": -0.7013474702835083, "logits/real": -0.8657256960868835, "logps/generated": -624.0083618164062, "logps/real": -149.85691833496094, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -22.289051055908203, "rewards/margins": 21.97234344482422, "rewards/real": -0.3167068660259247, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.93957345971564e-07, "logits/generated": -0.7635418772697449, "logits/real": -0.868754506111145, "logps/generated": -628.0731201171875, "logps/real": -171.21641540527344, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -22.14756965637207, "rewards/margins": 21.83392333984375, "rewards/real": -0.31364530324935913, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.927725118483413e-07, "logits/generated": -0.7472074031829834, "logits/real": -0.9306868314743042, "logps/generated": -664.8667602539062, "logps/real": -160.69815063476562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -25.677719116210938, "rewards/margins": 25.49850082397461, "rewards/real": -0.17921803891658783, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.915876777251184e-07, "logits/generated": -0.6238476037979126, "logits/real": -0.78472900390625, "logps/generated": -606.1143798828125, "logps/real": -158.46510314941406, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -20.29648208618164, "rewards/margins": 19.908735275268555, "rewards/real": -0.3877467215061188, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.904028436018957e-07, "logits/generated": -0.6451541185379028, "logits/real": -0.8735024333000183, "logps/generated": -645.0818481445312, "logps/real": -131.34632873535156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -22.540191650390625, "rewards/margins": 22.381816864013672, "rewards/real": -0.15837618708610535, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.892180094786729e-07, "logits/generated": -0.6812300682067871, "logits/real": -0.8363407850265503, "logps/generated": -600.5889282226562, "logps/real": -161.74234008789062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -21.389225006103516, "rewards/margins": 21.255290985107422, "rewards/real": -0.133933424949646, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.880331753554502e-07, "logits/generated": -0.6616766452789307, "logits/real": -0.8058059811592102, "logps/generated": -581.693359375, "logps/real": -169.41537475585938, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -19.568174362182617, "rewards/margins": 19.344226837158203, "rewards/real": -0.22394871711730957, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.868483412322275e-07, "logits/generated": -0.6738962531089783, "logits/real": -0.8422471880912781, "logps/generated": -610.7338256835938, "logps/real": -153.44923400878906, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -20.05523109436035, "rewards/margins": 19.948062896728516, "rewards/real": -0.1071687787771225, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.856635071090047e-07, "logits/generated": -0.653414785861969, "logits/real": -0.9212865829467773, "logps/generated": -636.19677734375, "logps/real": -136.12069702148438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.247209548950195, "rewards/margins": 23.25400161743164, "rewards/real": 0.006791981868445873, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.84478672985782e-07, "logits/generated": -0.652206301689148, "logits/real": -0.8476254343986511, "logps/generated": -619.7491455078125, "logps/real": -142.06788635253906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -21.76266860961914, "rewards/margins": 21.641902923583984, "rewards/real": -0.12076608836650848, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.832938388625591e-07, "logits/generated": -0.6429646015167236, "logits/real": -0.8978961706161499, "logps/generated": -618.6793823242188, "logps/real": -125.8365707397461, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -22.18993377685547, "rewards/margins": 22.008413314819336, "rewards/real": -0.18152059614658356, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.821090047393365e-07, "logits/generated": -0.6130845546722412, "logits/real": -0.8363273739814758, "logps/generated": -597.8096923828125, "logps/real": -160.01922607421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -20.191875457763672, "rewards/margins": 19.777849197387695, "rewards/real": -0.4140281081199646, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.809241706161137e-07, "logits/generated": -0.618815541267395, "logits/real": -0.7665129899978638, "logps/generated": -627.1131591796875, "logps/real": -141.24853515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -21.848682403564453, "rewards/margins": 21.604549407958984, "rewards/real": -0.24413225054740906, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.79739336492891e-07, "logits/generated": -0.6469287872314453, "logits/real": -0.7974787354469299, "logps/generated": -646.8034057617188, "logps/real": -140.12033081054688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -24.727245330810547, "rewards/margins": 24.623071670532227, "rewards/real": -0.10417119413614273, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.785545023696682e-07, "logits/generated": -0.6693117618560791, "logits/real": -0.8058202862739563, "logps/generated": -636.232421875, "logps/real": -162.97914123535156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -24.655866622924805, "rewards/margins": 24.45601463317871, "rewards/real": -0.19985152781009674, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.773696682464455e-07, "logits/generated": -0.6164982914924622, "logits/real": -0.7986790537834167, "logps/generated": -607.1170654296875, "logps/real": -139.31671142578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -20.806074142456055, "rewards/margins": 20.69213104248047, "rewards/real": -0.11394244432449341, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.7618483412322273e-07, "logits/generated": -0.6816304922103882, "logits/real": -0.7648627161979675, "logps/generated": -647.4364013671875, "logps/real": -150.38284301757812, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -25.9957275390625, "rewards/margins": 25.87929344177246, "rewards/real": -0.11643538624048233, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.7499999999999995e-07, "logits/generated": -0.6420483589172363, "logits/real": -0.8686118125915527, "logps/generated": -686.556640625, "logps/real": -170.68507385253906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.79473876953125, "rewards/margins": 26.400888442993164, "rewards/real": -0.3938508927822113, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.738151658767772e-07, "logits/generated": -0.6229578852653503, "logits/real": -0.7552638649940491, "logps/generated": -617.7360229492188, "logps/real": -133.21524047851562, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -23.75196075439453, "rewards/margins": 23.369274139404297, "rewards/real": -0.38268691301345825, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.726303317535545e-07, "logits/generated": -0.6239826679229736, "logits/real": -0.8113874197006226, "logps/generated": -617.291748046875, "logps/real": -171.48641967773438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -21.23967742919922, "rewards/margins": 20.85289764404297, "rewards/real": -0.3867819309234619, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.7144549763033177e-07, "logits/generated": -0.5856727361679077, "logits/real": -0.7748730182647705, "logps/generated": -646.7052612304688, "logps/real": -193.67135620117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -23.783584594726562, "rewards/margins": 23.435470581054688, "rewards/real": -0.34811311960220337, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.70260663507109e-07, "logits/generated": -0.6293947100639343, "logits/real": -0.8080043792724609, "logps/generated": -639.7860107421875, "logps/real": -146.7048797607422, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.628271102905273, "rewards/margins": 23.445405960083008, "rewards/real": -0.1828646957874298, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.690758293838862e-07, "logits/generated": -0.615898609161377, "logits/real": -0.7723320722579956, "logps/generated": -677.4393310546875, "logps/real": -144.49502563476562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.529178619384766, "rewards/margins": 26.3712158203125, "rewards/real": -0.1579606533050537, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.678909952606635e-07, "logits/generated": -0.5974934697151184, "logits/real": -0.7126566767692566, "logps/generated": -625.4193115234375, "logps/real": -168.69549560546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -21.015254974365234, "rewards/margins": 20.59768295288086, "rewards/real": -0.4175707697868347, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.667061611374407e-07, "logits/generated": -0.5658475756645203, "logits/real": -0.7219498157501221, "logps/generated": -662.0862426757812, "logps/real": -161.44467163085938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -24.643749237060547, "rewards/margins": 24.445858001708984, "rewards/real": -0.1978892832994461, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.65521327014218e-07, "logits/generated": -0.5992667078971863, "logits/real": -0.834603488445282, "logps/generated": -645.3001708984375, "logps/real": -129.46719360351562, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -22.823612213134766, "rewards/margins": 22.714466094970703, "rewards/real": -0.10914424806833267, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6433649289099525e-07, "logits/generated": -0.6031894087791443, "logits/real": -0.8013744354248047, "logps/generated": -621.9285278320312, "logps/real": -142.18630981445312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -23.36435317993164, "rewards/margins": 23.20974349975586, "rewards/real": -0.15460748970508575, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.631516587677725e-07, "logits/generated": -0.6474970579147339, "logits/real": -0.7969701290130615, "logps/generated": -695.6294555664062, "logps/real": -168.54324340820312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.67018699645996, "rewards/margins": 27.486658096313477, "rewards/real": -0.1835293024778366, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.6196682464454974e-07, "logits/generated": -0.5945593118667603, "logits/real": -0.8760132789611816, "logps/generated": -632.8075561523438, "logps/real": -137.9114532470703, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.00465965270996, "rewards/margins": 23.872516632080078, "rewards/real": -0.13214412331581116, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.60781990521327e-07, "logits/generated": -0.6659427285194397, "logits/real": -0.7805012464523315, "logps/generated": -676.3375244140625, "logps/real": -144.7049102783203, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.78857421875, "rewards/margins": 25.562801361083984, "rewards/real": -0.22577252984046936, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.5959715639810423e-07, "logits/generated": -0.5976084470748901, "logits/real": -0.7444257140159607, "logps/generated": -604.505615234375, "logps/real": -165.31021118164062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -21.380346298217773, "rewards/margins": 21.084182739257812, "rewards/real": -0.2961658239364624, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.5841232227488145e-07, "logits/generated": -0.6201892495155334, "logits/real": -0.7714813351631165, "logps/generated": -661.017822265625, "logps/real": -157.14991760253906, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -25.09454345703125, "rewards/margins": 24.870752334594727, "rewards/real": -0.22379302978515625, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.5722748815165873e-07, "logits/generated": -0.6445611715316772, "logits/real": -0.8067296147346497, "logps/generated": -723.9563598632812, "logps/real": -160.5393829345703, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -30.98370361328125, "rewards/margins": 30.44954490661621, "rewards/real": -0.5341606736183167, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.56042654028436e-07, "logits/generated": -0.6212409734725952, "logits/real": -0.8002877235412598, "logps/generated": -664.5400390625, "logps/real": -154.18406677246094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -26.307668685913086, "rewards/margins": 25.93606185913086, "rewards/real": -0.37160566449165344, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.5485781990521327e-07, "logits/generated": -0.6696589589118958, "logits/real": -0.872015655040741, "logps/generated": -713.975830078125, "logps/real": -128.1663055419922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -29.322790145874023, "rewards/margins": 28.88034439086914, "rewards/real": -0.44244661927223206, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.536729857819905e-07, "logits/generated": -0.6352511644363403, "logits/real": -0.8168119192123413, "logps/generated": -690.1238403320312, "logps/real": -147.59390258789062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.160791397094727, "rewards/margins": 28.624774932861328, "rewards/real": -0.5360159873962402, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.5248815165876776e-07, "logits/generated": -0.6255658268928528, "logits/real": -0.7953276634216309, "logps/generated": -707.5949096679688, "logps/real": -164.2091827392578, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -30.47397232055664, "rewards/margins": 29.822368621826172, "rewards/real": -0.6516034007072449, "step": 870 }, { "epoch": 0.56, "learning_rate": 4.5130331753554504e-07, "logits/generated": -0.6212276816368103, "logits/real": -0.7597033381462097, "logps/generated": -692.6005859375, "logps/real": -188.38082885742188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -30.789087295532227, "rewards/margins": 30.043895721435547, "rewards/real": -0.7451905608177185, "step": 880 }, { "epoch": 0.57, "learning_rate": 4.5011848341232226e-07, "logits/generated": -0.6456987261772156, "logits/real": -0.8051185607910156, "logps/generated": -692.8140258789062, "logps/real": -150.5452880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -30.668848037719727, "rewards/margins": 30.199214935302734, "rewards/real": -0.46962958574295044, "step": 890 }, { "epoch": 0.58, "learning_rate": 4.489336492890995e-07, "logits/generated": -0.6712831258773804, "logits/real": -0.826252281665802, "logps/generated": -746.2249145507812, "logps/real": -135.2972412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.030811309814453, "rewards/margins": 30.67293930053711, "rewards/real": -0.3578687012195587, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.4774881516587675e-07, "logits/generated": -0.6518301367759705, "logits/real": -0.8644415736198425, "logps/generated": -690.9013671875, "logps/real": -161.36314392089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.964740753173828, "rewards/margins": 28.42722511291504, "rewards/real": -0.5375159978866577, "step": 910 }, { "epoch": 0.59, "learning_rate": 4.46563981042654e-07, "logits/generated": -0.6160604953765869, "logits/real": -0.8334490060806274, "logps/generated": -717.3743286132812, "logps/real": -132.34591674804688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -28.668527603149414, "rewards/margins": 28.09389877319336, "rewards/real": -0.5746307969093323, "step": 920 }, { "epoch": 0.6, "learning_rate": 4.4537914691943124e-07, "logits/generated": -0.6645776033401489, "logits/real": -0.749662458896637, "logps/generated": -705.51708984375, "logps/real": -170.75979614257812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -30.325271606445312, "rewards/margins": 29.850238800048828, "rewards/real": -0.4750315248966217, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.441943127962085e-07, "logits/generated": -0.5848880410194397, "logits/real": -0.7599430084228516, "logps/generated": -679.7612915039062, "logps/real": -162.4516143798828, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -28.77053451538086, "rewards/margins": 28.397497177124023, "rewards/real": -0.37303638458251953, "step": 940 }, { "epoch": 0.61, "learning_rate": 4.430094786729858e-07, "logits/generated": -0.6037660837173462, "logits/real": -0.7843543887138367, "logps/generated": -699.0863037109375, "logps/real": -141.5878143310547, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -30.310083389282227, "rewards/margins": 29.800827026367188, "rewards/real": -0.5092543363571167, "step": 950 }, { "epoch": 0.61, "learning_rate": 4.4182464454976306e-07, "logits/generated": -0.6485855579376221, "logits/real": -0.7831935882568359, "logps/generated": -758.2669677734375, "logps/real": -161.2501220703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -35.414588928222656, "rewards/margins": 34.92060852050781, "rewards/real": -0.49398383498191833, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4063981042654023e-07, "logits/generated": -0.6142803430557251, "logits/real": -0.8085862398147583, "logps/generated": -758.5242919921875, "logps/real": -143.28065490722656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.11386489868164, "rewards/margins": 34.51411056518555, "rewards/real": -0.5997532606124878, "step": 970 }, { "epoch": 0.63, "learning_rate": 4.394549763033175e-07, "logits/generated": -0.6144439578056335, "logits/real": -0.7795756459236145, "logps/generated": -712.9147338867188, "logps/real": -147.69723510742188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -32.72540283203125, "rewards/margins": 32.39826202392578, "rewards/real": -0.3271421492099762, "step": 980 }, { "epoch": 0.63, "learning_rate": 4.382701421800948e-07, "logits/generated": -0.6217916011810303, "logits/real": -0.7831851840019226, "logps/generated": -756.0094604492188, "logps/real": -150.90347290039062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -36.669349670410156, "rewards/margins": 36.12870788574219, "rewards/real": -0.5406419634819031, "step": 990 }, { "epoch": 0.64, "learning_rate": 4.37085308056872e-07, "logits/generated": -0.5746406316757202, "logits/real": -0.7443927526473999, "logps/generated": -721.3018798828125, "logps/real": -167.23497009277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.08510971069336, "rewards/margins": 32.51539993286133, "rewards/real": -0.5697122812271118, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.3590047393364927e-07, "logits/generated": -0.6204794645309448, "logits/real": -0.8241082429885864, "logps/generated": -771.1573486328125, "logps/real": -142.06658935546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -36.35809326171875, "rewards/margins": 35.951690673828125, "rewards/real": -0.40640267729759216, "step": 1010 }, { "epoch": 0.65, "learning_rate": 4.3471563981042654e-07, "logits/generated": -0.6459885239601135, "logits/real": -0.7648425698280334, "logps/generated": -728.7994384765625, "logps/real": -142.18283081054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.09253692626953, "rewards/margins": 34.53176498413086, "rewards/real": -0.5607694387435913, "step": 1020 }, { "epoch": 0.66, "learning_rate": 4.335308056872038e-07, "logits/generated": -0.6198188066482544, "logits/real": -0.8037668466567993, "logps/generated": -784.0115966796875, "logps/real": -148.4331817626953, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -36.82765579223633, "rewards/margins": 36.30569839477539, "rewards/real": -0.5219635367393494, "step": 1030 }, { "epoch": 0.67, "learning_rate": 4.32345971563981e-07, "logits/generated": -0.5650381445884705, "logits/real": -0.7140682339668274, "logps/generated": -789.3671875, "logps/real": -162.1250762939453, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -37.974632263183594, "rewards/margins": 37.51204299926758, "rewards/real": -0.46258825063705444, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.3116113744075825e-07, "logits/generated": -0.5569009780883789, "logits/real": -0.6691209077835083, "logps/generated": -727.1697387695312, "logps/real": -149.501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.12350082397461, "rewards/margins": 31.786418914794922, "rewards/real": -0.3370811939239502, "step": 1050 }, { "epoch": 0.68, "learning_rate": 4.299763033175355e-07, "logits/generated": -0.6008241772651672, "logits/real": -0.7835357785224915, "logps/generated": -766.7589111328125, "logps/real": -131.9623260498047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -37.20771026611328, "rewards/margins": 36.767494201660156, "rewards/real": -0.4402230381965637, "step": 1060 }, { "epoch": 0.68, "learning_rate": 4.2879146919431274e-07, "logits/generated": -0.5574159622192383, "logits/real": -0.7532224059104919, "logps/generated": -766.7594604492188, "logps/real": -175.73806762695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -34.75395965576172, "rewards/margins": 34.18846130371094, "rewards/real": -0.565497875213623, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.2760663507109e-07, "logits/generated": -0.6016499400138855, "logits/real": -0.685789942741394, "logps/generated": -747.66748046875, "logps/real": -175.3254852294922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.80033874511719, "rewards/margins": 33.334190368652344, "rewards/real": -0.46614784002304077, "step": 1080 }, { "epoch": 0.7, "learning_rate": 4.264218009478673e-07, "logits/generated": -0.514714777469635, "logits/real": -0.729649543762207, "logps/generated": -599.8255615234375, "logps/real": -137.29776000976562, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -21.55307960510254, "rewards/margins": 21.553081512451172, "rewards/real": 2.1871178432775196e-06, "step": 1090 }, { "epoch": 0.7, "learning_rate": 4.2523696682464456e-07, "logits/generated": -0.44834762811660767, "logits/real": -0.662898600101471, "logps/generated": -626.8858642578125, "logps/real": -151.83450317382812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -21.125072479248047, "rewards/margins": 20.952497482299805, "rewards/real": -0.17257389426231384, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.240521327014218e-07, "logits/generated": -0.5126262307167053, "logits/real": -0.6713369488716125, "logps/generated": -653.6041259765625, "logps/real": -159.56890869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.021862030029297, "rewards/margins": 22.88108253479004, "rewards/real": -0.14077897369861603, "step": 1110 }, { "epoch": 0.72, "learning_rate": 4.22867298578199e-07, "logits/generated": -0.4650425910949707, "logits/real": -0.763433575630188, "logps/generated": -658.9270629882812, "logps/real": -135.75672912597656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.407550811767578, "rewards/margins": 24.228670120239258, "rewards/real": -0.1788794994354248, "step": 1120 }, { "epoch": 0.72, "learning_rate": 4.216824644549763e-07, "logits/generated": -0.4908994138240814, "logits/real": -0.6465893983840942, "logps/generated": -632.4884033203125, "logps/real": -145.31004333496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -21.62966537475586, "rewards/margins": 21.41011619567871, "rewards/real": -0.2195475846529007, "step": 1130 }, { "epoch": 0.73, "learning_rate": 4.2049763033175355e-07, "logits/generated": -0.4437866806983948, "logits/real": -0.6466466188430786, "logps/generated": -626.27294921875, "logps/real": -149.35035705566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.251583099365234, "rewards/margins": 22.10491371154785, "rewards/real": -0.1466691941022873, "step": 1140 }, { "epoch": 0.74, "learning_rate": 4.1931279620853077e-07, "logits/generated": -0.47175711393356323, "logits/real": -0.6160026788711548, "logps/generated": -677.9451904296875, "logps/real": -135.2311553955078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.66263771057129, "rewards/margins": 24.36861801147461, "rewards/real": -0.2940204441547394, "step": 1150 }, { "epoch": 0.74, "learning_rate": 4.1812796208530804e-07, "logits/generated": -0.5612315535545349, "logits/real": -0.6705020666122437, "logps/generated": -676.8372802734375, "logps/real": -168.85000610351562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.101232528686523, "rewards/margins": 26.762081146240234, "rewards/real": -0.3391529619693756, "step": 1160 }, { "epoch": 0.75, "learning_rate": 4.169431279620853e-07, "logits/generated": -0.4795566201210022, "logits/real": -0.6562764644622803, "logps/generated": -666.4014282226562, "logps/real": -148.19837951660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.7773494720459, "rewards/margins": 25.312881469726562, "rewards/real": -0.464468777179718, "step": 1170 }, { "epoch": 0.75, "learning_rate": 4.1575829383886253e-07, "logits/generated": -0.4656401574611664, "logits/real": -0.6328948736190796, "logps/generated": -658.664306640625, "logps/real": -155.73130798339844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.921737670898438, "rewards/margins": 26.695215225219727, "rewards/real": -0.22652335464954376, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.145734597156398e-07, "logits/generated": -0.4437786936759949, "logits/real": -0.6895097494125366, "logps/generated": -638.5018310546875, "logps/real": -140.78604125976562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.6170654296875, "rewards/margins": 25.46429443359375, "rewards/real": -0.15277239680290222, "step": 1190 }, { "epoch": 0.77, "learning_rate": 4.1338862559241703e-07, "logits/generated": -0.4505455493927002, "logits/real": -0.6155862808227539, "logps/generated": -652.1417236328125, "logps/real": -166.3102569580078, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/generated": -25.8651065826416, "rewards/margins": 25.632049560546875, "rewards/real": -0.2330542355775833, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.122037914691943e-07, "logits/generated": -0.5029697418212891, "logits/real": -0.6487875580787659, "logps/generated": -634.719482421875, "logps/real": -166.7958526611328, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -22.444910049438477, "rewards/margins": 22.197734832763672, "rewards/real": -0.24717314541339874, "step": 1210 }, { "epoch": 0.78, "learning_rate": 4.110189573459715e-07, "logits/generated": -0.4739890992641449, "logits/real": -0.7358786463737488, "logps/generated": -627.636474609375, "logps/real": -128.64334106445312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.257991790771484, "rewards/margins": 23.142520904541016, "rewards/real": -0.11547265946865082, "step": 1220 }, { "epoch": 0.79, "learning_rate": 4.098341232227488e-07, "logits/generated": -0.49049538373947144, "logits/real": -0.7324908971786499, "logps/generated": -697.52392578125, "logps/real": -150.76388549804688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.034936904907227, "rewards/margins": 26.84500503540039, "rewards/real": -0.18993662297725677, "step": 1230 }, { "epoch": 0.79, "learning_rate": 4.0864928909952607e-07, "logits/generated": -0.5197226405143738, "logits/real": -0.732746958732605, "logps/generated": -697.392333984375, "logps/real": -138.52279663085938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.976947784423828, "rewards/margins": 27.898412704467773, "rewards/real": -0.0785362496972084, "step": 1240 }, { "epoch": 0.8, "learning_rate": 4.074644549763033e-07, "logits/generated": -0.5259883403778076, "logits/real": -0.6905041933059692, "logps/generated": -631.3746337890625, "logps/real": -171.46287536621094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -23.118450164794922, "rewards/margins": 23.003276824951172, "rewards/real": -0.11517591774463654, "step": 1250 }, { "epoch": 0.81, "learning_rate": 4.0627962085308056e-07, "logits/generated": -0.4914798140525818, "logits/real": -0.6694945096969604, "logps/generated": -665.9325561523438, "logps/real": -152.16909790039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -26.65480613708496, "rewards/margins": 26.53061294555664, "rewards/real": -0.12419945001602173, "step": 1260 }, { "epoch": 0.81, "learning_rate": 4.0509478672985783e-07, "logits/generated": -0.435981810092926, "logits/real": -0.6437792181968689, "logps/generated": -639.2318115234375, "logps/real": -139.5286102294922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -23.267459869384766, "rewards/margins": 23.242870330810547, "rewards/real": -0.024588558822870255, "step": 1270 }, { "epoch": 0.82, "learning_rate": 4.0390995260663505e-07, "logits/generated": -0.46243348717689514, "logits/real": -0.6304915547370911, "logps/generated": -671.2921142578125, "logps/real": -151.30206298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -25.752239227294922, "rewards/margins": 25.52545738220215, "rewards/real": -0.2267828732728958, "step": 1280 }, { "epoch": 0.83, "learning_rate": 4.0272511848341227e-07, "logits/generated": -0.4131905436515808, "logits/real": -0.6179688572883606, "logps/generated": -685.4735107421875, "logps/real": -166.12875366210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.9171085357666, "rewards/margins": 27.653793334960938, "rewards/real": -0.2633177638053894, "step": 1290 }, { "epoch": 0.83, "learning_rate": 4.0154028436018954e-07, "logits/generated": -0.45457887649536133, "logits/real": -0.7321020364761353, "logps/generated": -687.7710571289062, "logps/real": -118.0494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.879419326782227, "rewards/margins": 27.65741539001465, "rewards/real": -0.22200465202331543, "step": 1300 }, { "epoch": 0.84, "learning_rate": 4.003554502369668e-07, "logits/generated": -0.4731278419494629, "logits/real": -0.6649892926216125, "logps/generated": -702.4985961914062, "logps/real": -164.13600158691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.949352264404297, "rewards/margins": 29.615692138671875, "rewards/real": -0.33366328477859497, "step": 1310 }, { "epoch": 0.84, "learning_rate": 3.991706161137441e-07, "logits/generated": -0.5154431462287903, "logits/real": -0.6099938750267029, "logps/generated": -733.9407958984375, "logps/real": -174.83822631835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.6252384185791, "rewards/margins": 31.47017478942871, "rewards/real": -0.15506593883037567, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.979857819905213e-07, "logits/generated": -0.47972407937049866, "logits/real": -0.7196077108383179, "logps/generated": -695.8734741210938, "logps/real": -144.66249084472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.702346801757812, "rewards/margins": 27.56471824645996, "rewards/real": -0.13762858510017395, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.968009478672986e-07, "logits/generated": -0.406221866607666, "logits/real": -0.6687533259391785, "logps/generated": -655.7437744140625, "logps/real": -135.68966674804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.703540802001953, "rewards/margins": 28.489782333374023, "rewards/real": -0.21375396847724915, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.9561611374407585e-07, "logits/generated": -0.45106711983680725, "logits/real": -0.6919107437133789, "logps/generated": -715.7987060546875, "logps/real": -138.26852416992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -32.545223236083984, "rewards/margins": 32.39795684814453, "rewards/real": -0.1472676545381546, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.94431279620853e-07, "logits/generated": -0.48046213388442993, "logits/real": -0.5451101064682007, "logps/generated": -697.7542724609375, "logps/real": -152.2278594970703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.348682403564453, "rewards/margins": 29.207202911376953, "rewards/real": -0.14147798717021942, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.932464454976303e-07, "logits/generated": -0.4254804253578186, "logits/real": -0.6588962078094482, "logps/generated": -705.3590087890625, "logps/real": -152.607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.081212997436523, "rewards/margins": 30.919677734375, "rewards/real": -0.16153457760810852, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.9206161137440757e-07, "logits/generated": -0.48913320899009705, "logits/real": -0.6368371248245239, "logps/generated": -745.2406005859375, "logps/real": -166.21762084960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.84128952026367, "rewards/margins": 33.531410217285156, "rewards/real": -0.30988219380378723, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.9087677725118484e-07, "logits/generated": -0.4334734380245209, "logits/real": -0.5950613021850586, "logps/generated": -713.3173217773438, "logps/real": -172.47543334960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.074203491210938, "rewards/margins": 30.784961700439453, "rewards/real": -0.28924185037612915, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.8969194312796206e-07, "logits/generated": -0.49943074584007263, "logits/real": -0.6621376276016235, "logps/generated": -750.9681396484375, "logps/real": -154.4829864501953, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.702816009521484, "rewards/margins": 32.558631896972656, "rewards/real": -0.14418402314186096, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.8850710900473933e-07, "logits/generated": -0.4399910867214203, "logits/real": -0.6627537608146667, "logps/generated": -712.4768676757812, "logps/real": -149.22299194335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.581218719482422, "rewards/margins": 30.372350692749023, "rewards/real": -0.2088705599308014, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.873222748815166e-07, "logits/generated": -0.4090496897697449, "logits/real": -0.6347898244857788, "logps/generated": -691.2296752929688, "logps/real": -159.6715545654297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -28.685245513916016, "rewards/margins": 28.553665161132812, "rewards/real": -0.13157956302165985, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.8613744075829377e-07, "logits/generated": -0.44337087869644165, "logits/real": -0.7070174813270569, "logps/generated": -725.4908447265625, "logps/real": -135.2198486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.43515396118164, "rewards/margins": 31.318073272705078, "rewards/real": -0.11708203703165054, "step": 1430 }, { "epoch": 0.92, "learning_rate": 3.8495260663507104e-07, "logits/generated": -0.45473846793174744, "logits/real": -0.7236835360527039, "logps/generated": -727.946533203125, "logps/real": -142.0230712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.88336944580078, "rewards/margins": 33.610408782958984, "rewards/real": -0.27295243740081787, "step": 1440 }, { "epoch": 0.93, "learning_rate": 3.837677725118483e-07, "logits/generated": -0.5566205978393555, "logits/real": -0.7040198445320129, "logps/generated": -782.2391357421875, "logps/real": -159.59271240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -36.38234329223633, "rewards/margins": 36.12818908691406, "rewards/real": -0.2541573643684387, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.825829383886256e-07, "logits/generated": -0.4843382239341736, "logits/real": -0.7185007929801941, "logps/generated": -782.1573486328125, "logps/real": -133.78878784179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.17876434326172, "rewards/margins": 36.1375846862793, "rewards/real": -0.04117864370346069, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.813981042654028e-07, "logits/generated": -0.527305006980896, "logits/real": -0.750108540058136, "logps/generated": -724.2720947265625, "logps/real": -165.68260192871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.12436294555664, "rewards/margins": 34.905982971191406, "rewards/real": -0.21837835013866425, "step": 1470 }, { "epoch": 0.95, "learning_rate": 3.802132701421801e-07, "logits/generated": -0.4532243609428406, "logits/real": -0.7372425198554993, "logps/generated": -707.7957763671875, "logps/real": -111.7388687133789, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.10491943359375, "rewards/margins": 30.893756866455078, "rewards/real": -0.21116304397583008, "step": 1480 }, { "epoch": 0.95, "learning_rate": 3.7902843601895736e-07, "logits/generated": -0.5261486768722534, "logits/real": -0.7524106502532959, "logps/generated": -750.0657958984375, "logps/real": -141.4464111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.03588104248047, "rewards/margins": 35.844032287597656, "rewards/real": -0.19184735417366028, "step": 1490 }, { "epoch": 0.96, "learning_rate": 3.778436018957346e-07, "logits/generated": -0.49053382873535156, "logits/real": -0.6678867936134338, "logps/generated": -723.454833984375, "logps/real": -158.5665283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.78173828125, "rewards/margins": 32.50086975097656, "rewards/real": -0.280868798494339, "step": 1500 }, { "epoch": 0.97, "learning_rate": 3.766587677725118e-07, "logits/generated": -0.5047518014907837, "logits/real": -0.7079204320907593, "logps/generated": -738.470947265625, "logps/real": -146.69248962402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.998992919921875, "rewards/margins": 33.81964111328125, "rewards/real": -0.1793525069952011, "step": 1510 }, { "epoch": 0.97, "learning_rate": 3.7547393364928907e-07, "logits/generated": -0.47372421622276306, "logits/real": -0.7369820475578308, "logps/generated": -785.177978515625, "logps/real": -148.13267517089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.820003509521484, "rewards/margins": 36.730655670166016, "rewards/real": -0.08935005962848663, "step": 1520 }, { "epoch": 0.98, "learning_rate": 3.7428909952606634e-07, "logits/generated": -0.5118182897567749, "logits/real": -0.6915109753608704, "logps/generated": -767.2000732421875, "logps/real": -178.1002655029297, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -36.287994384765625, "rewards/margins": 35.94424057006836, "rewards/real": -0.3437514007091522, "step": 1530 }, { "epoch": 0.99, "learning_rate": 3.7310426540284356e-07, "logits/generated": -0.48033565282821655, "logits/real": -0.6938971281051636, "logps/generated": -790.1136474609375, "logps/real": -139.7992401123047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -37.80128860473633, "rewards/margins": 37.25532531738281, "rewards/real": -0.5459665060043335, "step": 1540 }, { "epoch": 0.99, "learning_rate": 3.7191943127962083e-07, "logits/generated": -0.48637381196022034, "logits/real": -0.6838294863700867, "logps/generated": -765.7503662109375, "logps/real": -158.60995483398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.00304412841797, "rewards/margins": 34.73870086669922, "rewards/real": -0.26434019207954407, "step": 1550 }, { "epoch": 1.0, "learning_rate": 3.707345971563981e-07, "logits/generated": -0.44851940870285034, "logits/real": -0.6709171533584595, "logps/generated": -771.0245971679688, "logps/real": -168.75152587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -38.06503677368164, "rewards/margins": 37.67142105102539, "rewards/real": -0.3936167359352112, "step": 1560 }, { "epoch": 1.0, "learning_rate": 3.695497630331754e-07, "logits/generated": -0.45538201928138733, "logits/real": -0.6043254733085632, "logps/generated": -774.0435791015625, "logps/real": -145.94451904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.95838928222656, "rewards/margins": 36.758235931396484, "rewards/real": -0.2001533955335617, "step": 1570 }, { "epoch": 1.01, "learning_rate": 3.683649289099526e-07, "logits/generated": -0.4710386395454407, "logits/real": -0.6742110252380371, "logps/generated": -798.041259765625, "logps/real": -149.87484741210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -38.644805908203125, "rewards/margins": 38.51628875732422, "rewards/real": -0.12851884961128235, "step": 1580 }, { "epoch": 1.02, "learning_rate": 3.671800947867298e-07, "logits/generated": -0.4763055741786957, "logits/real": -0.6764456629753113, "logps/generated": -791.0558471679688, "logps/real": -157.50120544433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -38.532691955566406, "rewards/margins": 38.26321029663086, "rewards/real": -0.26947957277297974, "step": 1590 }, { "epoch": 1.02, "learning_rate": 3.659952606635071e-07, "logits/generated": -0.47692328691482544, "logits/real": -0.606033980846405, "logps/generated": -754.9681396484375, "logps/real": -148.64236450195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.7038459777832, "rewards/margins": 35.1637077331543, "rewards/real": -0.5401372313499451, "step": 1600 }, { "epoch": 1.03, "learning_rate": 3.648104265402843e-07, "logits/generated": -0.49335426092147827, "logits/real": -0.68101966381073, "logps/generated": -862.1978759765625, "logps/real": -143.81234741210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.46797180175781, "rewards/margins": 43.25313186645508, "rewards/real": -0.21484307944774628, "step": 1610 }, { "epoch": 1.04, "learning_rate": 3.636255924170616e-07, "logits/generated": -0.4993807375431061, "logits/real": -0.6376734972000122, "logps/generated": -830.0358276367188, "logps/real": -173.48251342773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.75886154174805, "rewards/margins": 42.36219787597656, "rewards/real": -0.3966585397720337, "step": 1620 }, { "epoch": 1.04, "learning_rate": 3.6244075829383886e-07, "logits/generated": -0.4894142746925354, "logits/real": -0.671286940574646, "logps/generated": -791.9283447265625, "logps/real": -157.88449096679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -37.980499267578125, "rewards/margins": 37.63560104370117, "rewards/real": -0.34489426016807556, "step": 1630 }, { "epoch": 1.05, "learning_rate": 3.6125592417061613e-07, "logits/generated": -0.4308968484401703, "logits/real": -0.6662777066230774, "logps/generated": -815.94970703125, "logps/real": -132.9452667236328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.20854187011719, "rewards/margins": 40.93267059326172, "rewards/real": -0.275868684053421, "step": 1640 }, { "epoch": 1.06, "learning_rate": 3.6007109004739335e-07, "logits/generated": -0.44717854261398315, "logits/real": -0.6490769982337952, "logps/generated": -752.9622192382812, "logps/real": -155.48178100585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.214019775390625, "rewards/margins": 35.96308135986328, "rewards/real": -0.25093746185302734, "step": 1650 }, { "epoch": 1.06, "learning_rate": 3.588862559241706e-07, "logits/generated": -0.47657886147499084, "logits/real": -0.6330237984657288, "logps/generated": -787.4442749023438, "logps/real": -159.59701538085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.82368850708008, "rewards/margins": 36.52547073364258, "rewards/real": -0.2982181906700134, "step": 1660 }, { "epoch": 1.07, "learning_rate": 3.5770142180094784e-07, "logits/generated": -0.4685605466365814, "logits/real": -0.6297181844711304, "logps/generated": -827.0250244140625, "logps/real": -150.63694763183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.97758483886719, "rewards/margins": 41.71283721923828, "rewards/real": -0.26475200057029724, "step": 1670 }, { "epoch": 1.07, "learning_rate": 3.5651658767772506e-07, "logits/generated": -0.45342230796813965, "logits/real": -0.6486082673072815, "logps/generated": -749.4054565429688, "logps/real": -126.46002197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.42198944091797, "rewards/margins": 35.160091400146484, "rewards/real": -0.26189571619033813, "step": 1680 }, { "epoch": 1.08, "learning_rate": 3.5533175355450234e-07, "logits/generated": -0.4470803141593933, "logits/real": -0.5735016465187073, "logps/generated": -766.5286865234375, "logps/real": -174.92886352539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.0324592590332, "rewards/margins": 38.663352966308594, "rewards/real": -0.3691008687019348, "step": 1690 }, { "epoch": 1.09, "learning_rate": 3.541469194312796e-07, "logits/generated": -0.47693657875061035, "logits/real": -0.7134417295455933, "logps/generated": -820.4762573242188, "logps/real": -122.05989837646484, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.139793395996094, "rewards/margins": 42.87862014770508, "rewards/real": -0.26117831468582153, "step": 1700 }, { "epoch": 1.09, "learning_rate": 3.529620853080569e-07, "logits/generated": -0.45882320404052734, "logits/real": -0.664508044719696, "logps/generated": -831.1781005859375, "logps/real": -147.54002380371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.169921875, "rewards/margins": 40.772850036621094, "rewards/real": -0.39707642793655396, "step": 1710 }, { "epoch": 1.1, "learning_rate": 3.517772511848341e-07, "logits/generated": -0.4301510453224182, "logits/real": -0.6589769124984741, "logps/generated": -819.22119140625, "logps/real": -183.15982055664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.374855041503906, "rewards/margins": 41.15515899658203, "rewards/real": -0.2196962833404541, "step": 1720 }, { "epoch": 1.11, "learning_rate": 3.505924170616114e-07, "logits/generated": -0.4489319920539856, "logits/real": -0.6075456738471985, "logps/generated": -850.15673828125, "logps/real": -150.83221435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.46028137207031, "rewards/margins": 43.98347473144531, "rewards/real": -0.47680991888046265, "step": 1730 }, { "epoch": 1.11, "learning_rate": 3.4940758293838865e-07, "logits/generated": -0.4293234944343567, "logits/real": -0.7189976572990417, "logps/generated": -835.0768432617188, "logps/real": -131.94656372070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.448909759521484, "rewards/margins": 42.11590576171875, "rewards/real": -0.3330024182796478, "step": 1740 }, { "epoch": 1.12, "learning_rate": 3.482227488151658e-07, "logits/generated": -0.41782283782958984, "logits/real": -0.5898563265800476, "logps/generated": -835.0525512695312, "logps/real": -165.47747802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.286441802978516, "rewards/margins": 42.87544631958008, "rewards/real": -0.4109969735145569, "step": 1750 }, { "epoch": 1.13, "learning_rate": 3.470379146919431e-07, "logits/generated": -0.4614785313606262, "logits/real": -0.5493655204772949, "logps/generated": -817.9602661132812, "logps/real": -168.73268127441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -40.68376159667969, "rewards/margins": 40.22270202636719, "rewards/real": -0.4610599875450134, "step": 1760 }, { "epoch": 1.13, "learning_rate": 3.4585308056872036e-07, "logits/generated": -0.4272429347038269, "logits/real": -0.5022194981575012, "logps/generated": -787.2926635742188, "logps/real": -167.05831909179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -38.47704315185547, "rewards/margins": 38.09776306152344, "rewards/real": -0.37928327918052673, "step": 1770 }, { "epoch": 1.14, "learning_rate": 3.4466824644549763e-07, "logits/generated": -0.4905944764614105, "logits/real": -0.6622756719589233, "logps/generated": -805.285400390625, "logps/real": -178.21536254882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.157222747802734, "rewards/margins": 40.712562561035156, "rewards/real": -0.4446594715118408, "step": 1780 }, { "epoch": 1.15, "learning_rate": 3.4348341232227485e-07, "logits/generated": -0.4158329367637634, "logits/real": -0.6186385750770569, "logps/generated": -749.114501953125, "logps/real": -156.46290588378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.60091781616211, "rewards/margins": 36.17322540283203, "rewards/real": -0.4276936650276184, "step": 1790 }, { "epoch": 1.15, "learning_rate": 3.422985781990521e-07, "logits/generated": -0.4725651144981384, "logits/real": -0.652617335319519, "logps/generated": -847.3025512695312, "logps/real": -150.90065002441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.543460845947266, "rewards/margins": 42.95781326293945, "rewards/real": -0.5856472253799438, "step": 1800 }, { "epoch": 1.16, "learning_rate": 3.411137440758294e-07, "logits/generated": -0.4408513605594635, "logits/real": -0.6833234429359436, "logps/generated": -768.7931518554688, "logps/real": -134.6144256591797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -37.13344955444336, "rewards/margins": 36.78771209716797, "rewards/real": -0.3457415997982025, "step": 1810 }, { "epoch": 1.16, "learning_rate": 3.3992890995260667e-07, "logits/generated": -0.4581897258758545, "logits/real": -0.595461368560791, "logps/generated": -827.2222900390625, "logps/real": -174.0118408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.74081039428711, "rewards/margins": 42.331790924072266, "rewards/real": -0.40901678800582886, "step": 1820 }, { "epoch": 1.17, "learning_rate": 3.3874407582938384e-07, "logits/generated": -0.45837849378585815, "logits/real": -0.6876403093338013, "logps/generated": -833.8259887695312, "logps/real": -141.63064575195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.820953369140625, "rewards/margins": 41.39267349243164, "rewards/real": -0.42827802896499634, "step": 1830 }, { "epoch": 1.18, "learning_rate": 3.375592417061611e-07, "logits/generated": -0.48998793959617615, "logits/real": -0.6868919730186462, "logps/generated": -858.2042846679688, "logps/real": -145.3782196044922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.131046295166016, "rewards/margins": 43.59005355834961, "rewards/real": -0.5409911870956421, "step": 1840 }, { "epoch": 1.18, "learning_rate": 3.363744075829384e-07, "logits/generated": -0.4100268483161926, "logits/real": -0.694664478302002, "logps/generated": -809.3135375976562, "logps/real": -166.88694763183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.03974151611328, "rewards/margins": 40.51675033569336, "rewards/real": -0.5229931473731995, "step": 1850 }, { "epoch": 1.19, "learning_rate": 3.351895734597156e-07, "logits/generated": -0.49493294954299927, "logits/real": -0.6615623235702515, "logps/generated": -918.6854248046875, "logps/real": -144.638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.01395797729492, "rewards/margins": 51.5162467956543, "rewards/real": -0.4977096915245056, "step": 1860 }, { "epoch": 1.2, "learning_rate": 3.340047393364929e-07, "logits/generated": -0.3965403735637665, "logits/real": -0.6068152189254761, "logps/generated": -797.3876342773438, "logps/real": -138.4888458251953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.271629333496094, "rewards/margins": 38.95609664916992, "rewards/real": -0.3155314326286316, "step": 1870 }, { "epoch": 1.2, "learning_rate": 3.3281990521327015e-07, "logits/generated": -0.43841552734375, "logits/real": -0.5667222738265991, "logps/generated": -804.7280883789062, "logps/real": -173.30206298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.95454788208008, "rewards/margins": 39.456111907958984, "rewards/real": -0.49843597412109375, "step": 1880 }, { "epoch": 1.21, "learning_rate": 3.316350710900474e-07, "logits/generated": -0.38486871123313904, "logits/real": -0.5955111384391785, "logps/generated": -779.2044677734375, "logps/real": -138.0015869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -38.943260192871094, "rewards/margins": 38.430908203125, "rewards/real": -0.5123514533042908, "step": 1890 }, { "epoch": 1.22, "learning_rate": 3.304502369668246e-07, "logits/generated": -0.4344411790370941, "logits/real": -0.662503182888031, "logps/generated": -789.8323974609375, "logps/real": -138.18624877929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.464324951171875, "rewards/margins": 39.04804229736328, "rewards/real": -0.41627994179725647, "step": 1900 }, { "epoch": 1.22, "learning_rate": 3.2926540284360186e-07, "logits/generated": -0.40550222992897034, "logits/real": -0.6265038251876831, "logps/generated": -832.6241455078125, "logps/real": -186.16464233398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.117034912109375, "rewards/margins": 41.634342193603516, "rewards/real": -0.48269376158714294, "step": 1910 }, { "epoch": 1.23, "learning_rate": 3.2808056872037913e-07, "logits/generated": -0.4908333718776703, "logits/real": -0.6979160904884338, "logps/generated": -800.664794921875, "logps/real": -144.0208740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.349517822265625, "rewards/margins": 38.85806655883789, "rewards/real": -0.491449773311615, "step": 1920 }, { "epoch": 1.23, "learning_rate": 3.2689573459715635e-07, "logits/generated": -0.4737107753753662, "logits/real": -0.6124163866043091, "logps/generated": -789.6759643554688, "logps/real": -173.1675262451172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.43712615966797, "rewards/margins": 40.75090789794922, "rewards/real": -0.6862186193466187, "step": 1930 }, { "epoch": 1.24, "learning_rate": 3.2571090047393363e-07, "logits/generated": -0.4662472605705261, "logits/real": -0.7021108865737915, "logps/generated": -912.7789916992188, "logps/real": -144.39654541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.74687576293945, "rewards/margins": 50.47743606567383, "rewards/real": -0.26943859457969666, "step": 1940 }, { "epoch": 1.25, "learning_rate": 3.245260663507109e-07, "logits/generated": -0.5851739645004272, "logits/real": -0.708136260509491, "logps/generated": -838.8018798828125, "logps/real": -151.24722290039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.7834358215332, "rewards/margins": 44.43694305419922, "rewards/real": -0.3464917838573456, "step": 1950 }, { "epoch": 1.25, "learning_rate": 3.2334123222748817e-07, "logits/generated": -0.5089236497879028, "logits/real": -0.6847448945045471, "logps/generated": -894.1632690429688, "logps/real": -151.0913543701172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.116729736328125, "rewards/margins": 47.81326675415039, "rewards/real": -0.3034594655036926, "step": 1960 }, { "epoch": 1.26, "learning_rate": 3.221563981042654e-07, "logits/generated": -0.4521718919277191, "logits/real": -0.609528660774231, "logps/generated": -829.3768310546875, "logps/real": -167.9632568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.617408752441406, "rewards/margins": 43.08763885498047, "rewards/real": -0.5297662019729614, "step": 1970 }, { "epoch": 1.27, "learning_rate": 3.209715639810426e-07, "logits/generated": -0.5104943513870239, "logits/real": -0.6578128933906555, "logps/generated": -897.2025146484375, "logps/real": -146.1173858642578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.29200744628906, "rewards/margins": 49.76136016845703, "rewards/real": -0.5306479930877686, "step": 1980 }, { "epoch": 1.27, "learning_rate": 3.197867298578199e-07, "logits/generated": -0.453556627035141, "logits/real": -0.6318106651306152, "logps/generated": -901.5166015625, "logps/real": -137.2063751220703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -49.143409729003906, "rewards/margins": 48.74793243408203, "rewards/real": -0.3954845070838928, "step": 1990 }, { "epoch": 1.28, "learning_rate": 3.186018957345971e-07, "logits/generated": -0.530498743057251, "logits/real": -0.6793403029441833, "logps/generated": -878.6456909179688, "logps/real": -138.333251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.53958511352539, "rewards/margins": 47.25537872314453, "rewards/real": -0.2842068076133728, "step": 2000 }, { "epoch": 1.29, "learning_rate": 3.174170616113744e-07, "logits/generated": -0.5185251832008362, "logits/real": -0.6888160109519958, "logps/generated": -882.78076171875, "logps/real": -149.84170532226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -46.70854187011719, "rewards/margins": 46.299781799316406, "rewards/real": -0.40876227617263794, "step": 2010 }, { "epoch": 1.29, "learning_rate": 3.1623222748815165e-07, "logits/generated": -0.5407160520553589, "logits/real": -0.6995197534561157, "logps/generated": -925.1043701171875, "logps/real": -172.48684692382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.484397888183594, "rewards/margins": 52.23331832885742, "rewards/real": -0.2510821521282196, "step": 2020 }, { "epoch": 1.3, "learning_rate": 3.150473933649289e-07, "logits/generated": -0.47896209359169006, "logits/real": -0.648679792881012, "logps/generated": -855.0648193359375, "logps/real": -140.78317260742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.74834442138672, "rewards/margins": 44.42583084106445, "rewards/real": -0.3225128650665283, "step": 2030 }, { "epoch": 1.31, "learning_rate": 3.1386255924170614e-07, "logits/generated": -0.48660707473754883, "logits/real": -0.6823971271514893, "logps/generated": -855.0968627929688, "logps/real": -149.41519165039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -45.171939849853516, "rewards/margins": 44.90800857543945, "rewards/real": -0.26393207907676697, "step": 2040 }, { "epoch": 1.31, "learning_rate": 3.126777251184834e-07, "logits/generated": -0.4488789141178131, "logits/real": -0.6946064233779907, "logps/generated": -834.7445068359375, "logps/real": -149.4184112548828, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.987037658691406, "rewards/margins": 44.69217300415039, "rewards/real": -0.29486605525016785, "step": 2050 }, { "epoch": 1.32, "learning_rate": 3.1149289099526064e-07, "logits/generated": -0.46628251671791077, "logits/real": -0.6745079159736633, "logps/generated": -957.2546997070312, "logps/real": -154.1484832763672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.21331024169922, "rewards/margins": 52.879371643066406, "rewards/real": -0.33394068479537964, "step": 2060 }, { "epoch": 1.32, "learning_rate": 3.103080568720379e-07, "logits/generated": -0.49304255843162537, "logits/real": -0.6859273314476013, "logps/generated": -900.3800659179688, "logps/real": -166.83229064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.62682342529297, "rewards/margins": 48.23986053466797, "rewards/real": -0.3869660794734955, "step": 2070 }, { "epoch": 1.33, "learning_rate": 3.0912322274881513e-07, "logits/generated": -0.532124400138855, "logits/real": -0.6791267991065979, "logps/generated": -867.7174072265625, "logps/real": -173.84207153320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -45.860843658447266, "rewards/margins": 45.43999481201172, "rewards/real": -0.4208555817604065, "step": 2080 }, { "epoch": 1.34, "learning_rate": 3.079383886255924e-07, "logits/generated": -0.5052396655082703, "logits/real": -0.6648889780044556, "logps/generated": -909.2999267578125, "logps/real": -178.1378631591797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.73103332519531, "rewards/margins": 47.32225799560547, "rewards/real": -0.40877556800842285, "step": 2090 }, { "epoch": 1.34, "learning_rate": 3.067535545023697e-07, "logits/generated": -0.5442999601364136, "logits/real": -0.7468653917312622, "logps/generated": -960.0681762695312, "logps/real": -151.3079071044922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -56.67901611328125, "rewards/margins": 56.42626953125, "rewards/real": -0.25274744629859924, "step": 2100 }, { "epoch": 1.35, "learning_rate": 3.055687203791469e-07, "logits/generated": -0.49901169538497925, "logits/real": -0.6442614793777466, "logps/generated": -879.4744873046875, "logps/real": -180.48049926757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.23583221435547, "rewards/margins": 46.76594161987305, "rewards/real": -0.4698910713195801, "step": 2110 }, { "epoch": 1.36, "learning_rate": 3.0438388625592417e-07, "logits/generated": -0.4474611282348633, "logits/real": -0.6742789149284363, "logps/generated": -857.6492309570312, "logps/real": -136.08782958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.95075988769531, "rewards/margins": 43.52191925048828, "rewards/real": -0.4288388788700104, "step": 2120 }, { "epoch": 1.36, "learning_rate": 3.0319905213270144e-07, "logits/generated": -0.42652368545532227, "logits/real": -0.6252545118331909, "logps/generated": -812.95361328125, "logps/real": -163.1314239501953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -42.40178680419922, "rewards/margins": 42.024620056152344, "rewards/real": -0.37716203927993774, "step": 2130 }, { "epoch": 1.37, "learning_rate": 3.0201421800947866e-07, "logits/generated": -0.41971296072006226, "logits/real": -0.6229659914970398, "logps/generated": -934.0114135742188, "logps/real": -141.24195861816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -51.96159744262695, "rewards/margins": 51.59962844848633, "rewards/real": -0.3619686961174011, "step": 2140 }, { "epoch": 1.38, "learning_rate": 3.008293838862559e-07, "logits/generated": -0.5068638324737549, "logits/real": -0.7758525609970093, "logps/generated": -907.1189575195312, "logps/real": -138.0588836669922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.612152099609375, "rewards/margins": 47.226993560791016, "rewards/real": -0.38516414165496826, "step": 2150 }, { "epoch": 1.38, "learning_rate": 2.9964454976303315e-07, "logits/generated": -0.4722062945365906, "logits/real": -0.6450417041778564, "logps/generated": -845.0633544921875, "logps/real": -173.4315185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -44.63590621948242, "rewards/margins": 44.2160530090332, "rewards/real": -0.41985201835632324, "step": 2160 }, { "epoch": 1.39, "learning_rate": 2.984597156398104e-07, "logits/generated": -0.49884462356567383, "logits/real": -0.6271129846572876, "logps/generated": -924.2891845703125, "logps/real": -160.28036499023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.93656539916992, "rewards/margins": 52.53998947143555, "rewards/real": -0.3965730667114258, "step": 2170 }, { "epoch": 1.39, "learning_rate": 2.9727488151658765e-07, "logits/generated": -0.48375964164733887, "logits/real": -0.7125850915908813, "logps/generated": -971.89404296875, "logps/real": -155.76739501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.2780876159668, "rewards/margins": 56.74763870239258, "rewards/real": -0.5304462909698486, "step": 2180 }, { "epoch": 1.4, "learning_rate": 2.960900473933649e-07, "logits/generated": -0.5001789927482605, "logits/real": -0.6946722269058228, "logps/generated": -875.4529418945312, "logps/real": -136.772216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -45.757354736328125, "rewards/margins": 45.39958953857422, "rewards/real": -0.3577651083469391, "step": 2190 }, { "epoch": 1.41, "learning_rate": 2.949052132701422e-07, "logits/generated": -0.481309711933136, "logits/real": -0.6437762975692749, "logps/generated": -897.0853271484375, "logps/real": -148.08895874023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.52891540527344, "rewards/margins": 48.114437103271484, "rewards/real": -0.4144725203514099, "step": 2200 }, { "epoch": 1.41, "learning_rate": 2.9372037914691946e-07, "logits/generated": -0.505331814289093, "logits/real": -0.7198413014411926, "logps/generated": -943.2364501953125, "logps/real": -126.28971099853516, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.058265686035156, "rewards/margins": 51.653160095214844, "rewards/real": -0.4051007330417633, "step": 2210 }, { "epoch": 1.42, "learning_rate": 2.9253554502369663e-07, "logits/generated": -0.4623163342475891, "logits/real": -0.6992497444152832, "logps/generated": -924.9318237304688, "logps/real": -165.45114135742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.68879318237305, "rewards/margins": 52.19450759887695, "rewards/real": -0.4942806363105774, "step": 2220 }, { "epoch": 1.43, "learning_rate": 2.913507109004739e-07, "logits/generated": -0.4906153082847595, "logits/real": -0.6900730729103088, "logps/generated": -922.6204223632812, "logps/real": -129.15237426757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -51.038394927978516, "rewards/margins": 50.75798797607422, "rewards/real": -0.2804059386253357, "step": 2230 }, { "epoch": 1.43, "learning_rate": 2.901658767772512e-07, "logits/generated": -0.4813242554664612, "logits/real": -0.7062429785728455, "logps/generated": -821.6672973632812, "logps/real": -163.94393920898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -43.79729461669922, "rewards/margins": 43.21052932739258, "rewards/real": -0.5867670178413391, "step": 2240 }, { "epoch": 1.44, "learning_rate": 2.889810426540284e-07, "logits/generated": -0.43650323152542114, "logits/real": -0.723192572593689, "logps/generated": -875.97412109375, "logps/real": -134.3582000732422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.2892951965332, "rewards/margins": 47.99460220336914, "rewards/real": -0.2946951985359192, "step": 2250 }, { "epoch": 1.45, "learning_rate": 2.8779620853080567e-07, "logits/generated": -0.4641779065132141, "logits/real": -0.6070187091827393, "logps/generated": -904.2113037109375, "logps/real": -158.0858612060547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -51.96845245361328, "rewards/margins": 51.64441680908203, "rewards/real": -0.3240307569503784, "step": 2260 }, { "epoch": 1.45, "learning_rate": 2.8661137440758294e-07, "logits/generated": -0.543385922908783, "logits/real": -0.6664692163467407, "logps/generated": -886.740234375, "logps/real": -158.68768310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.016876220703125, "rewards/margins": 47.88544464111328, "rewards/real": -0.13143035769462585, "step": 2270 }, { "epoch": 1.46, "learning_rate": 2.854265402843602e-07, "logits/generated": -0.5325735807418823, "logits/real": -0.6880441904067993, "logps/generated": -889.3059692382812, "logps/real": -143.56271362304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.709747314453125, "rewards/margins": 48.41456604003906, "rewards/real": -0.29517900943756104, "step": 2280 }, { "epoch": 1.47, "learning_rate": 2.842417061611374e-07, "logits/generated": -0.5220402479171753, "logits/real": -0.7008036375045776, "logps/generated": -901.8049926757812, "logps/real": -151.60629272460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.60210037231445, "rewards/margins": 47.190391540527344, "rewards/real": -0.4117053151130676, "step": 2290 }, { "epoch": 1.47, "learning_rate": 2.8305687203791465e-07, "logits/generated": -0.5935906171798706, "logits/real": -0.8247605562210083, "logps/generated": -907.3389892578125, "logps/real": -138.0072479248047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -49.43927001953125, "rewards/margins": 49.20110321044922, "rewards/real": -0.23817138373851776, "step": 2300 }, { "epoch": 1.48, "learning_rate": 2.8187203791469193e-07, "logits/generated": -0.5039738416671753, "logits/real": -0.7532294988632202, "logps/generated": -940.0494995117188, "logps/real": -136.12376403808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.96204376220703, "rewards/margins": 55.6636962890625, "rewards/real": -0.2983424961566925, "step": 2310 }, { "epoch": 1.48, "learning_rate": 2.806872037914692e-07, "logits/generated": -0.5363454818725586, "logits/real": -0.6708102822303772, "logps/generated": -924.869140625, "logps/real": -158.69607543945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.98878860473633, "rewards/margins": 50.51224899291992, "rewards/real": -0.4765354096889496, "step": 2320 }, { "epoch": 1.49, "learning_rate": 2.795023696682464e-07, "logits/generated": -0.5462719202041626, "logits/real": -0.6856478452682495, "logps/generated": -908.8020629882812, "logps/real": -161.13491821289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.132118225097656, "rewards/margins": 49.841583251953125, "rewards/real": -0.2905333936214447, "step": 2330 }, { "epoch": 1.5, "learning_rate": 2.783175355450237e-07, "logits/generated": -0.5182799696922302, "logits/real": -0.7454923391342163, "logps/generated": -868.3385009765625, "logps/real": -131.3604736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.998130798339844, "rewards/margins": 47.67443084716797, "rewards/real": -0.32370421290397644, "step": 2340 }, { "epoch": 1.5, "learning_rate": 2.7713270142180097e-07, "logits/generated": -0.46355119347572327, "logits/real": -0.6795281171798706, "logps/generated": -893.9700927734375, "logps/real": -150.07882690429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -49.319942474365234, "rewards/margins": 48.991004943847656, "rewards/real": -0.3289386034011841, "step": 2350 }, { "epoch": 1.51, "learning_rate": 2.759478672985782e-07, "logits/generated": -0.477322518825531, "logits/real": -0.6399216055870056, "logps/generated": -928.3748779296875, "logps/real": -148.43809509277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.18648147583008, "rewards/margins": 53.1119499206543, "rewards/real": -0.07453130185604095, "step": 2360 }, { "epoch": 1.52, "learning_rate": 2.747630331753554e-07, "logits/generated": -0.47234511375427246, "logits/real": -0.5880690217018127, "logps/generated": -861.2603759765625, "logps/real": -168.40231323242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.96990203857422, "rewards/margins": 47.553489685058594, "rewards/real": -0.4164124131202698, "step": 2370 }, { "epoch": 1.52, "learning_rate": 2.735781990521327e-07, "logits/generated": -0.4614683985710144, "logits/real": -0.5570347309112549, "logps/generated": -906.6154174804688, "logps/real": -196.1412811279297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -49.976844787597656, "rewards/margins": 49.53584671020508, "rewards/real": -0.44100189208984375, "step": 2380 }, { "epoch": 1.53, "learning_rate": 2.7239336492890995e-07, "logits/generated": -0.49024948477745056, "logits/real": -0.6667122840881348, "logps/generated": -1002.1686401367188, "logps/real": -143.6676788330078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.17181396484375, "rewards/margins": 58.65046310424805, "rewards/real": -0.521342933177948, "step": 2390 }, { "epoch": 1.54, "learning_rate": 2.7120853080568717e-07, "logits/generated": -0.4796825051307678, "logits/real": -0.701050877571106, "logps/generated": -962.9171752929688, "logps/real": -143.58999633789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.36942672729492, "rewards/margins": 54.066436767578125, "rewards/real": -0.30298811197280884, "step": 2400 }, { "epoch": 1.54, "learning_rate": 2.7002369668246444e-07, "logits/generated": -0.49393147230148315, "logits/real": -0.6975389719009399, "logps/generated": -1020.3138427734375, "logps/real": -122.83097839355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.38806915283203, "rewards/margins": 60.000091552734375, "rewards/real": -0.3879725933074951, "step": 2410 }, { "epoch": 1.55, "learning_rate": 2.688388625592417e-07, "logits/generated": -0.4902682900428772, "logits/real": -0.6860643625259399, "logps/generated": -1019.5838012695312, "logps/real": -162.5839080810547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.252159118652344, "rewards/margins": 60.7586669921875, "rewards/real": -0.49349674582481384, "step": 2420 }, { "epoch": 1.55, "learning_rate": 2.6765402843601894e-07, "logits/generated": -0.45780739188194275, "logits/real": -0.6357568502426147, "logps/generated": -1011.2120971679688, "logps/real": -139.41915893554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.23823165893555, "rewards/margins": 58.670433044433594, "rewards/real": -0.567794919013977, "step": 2430 }, { "epoch": 1.56, "learning_rate": 2.664691943127962e-07, "logits/generated": -0.4731730818748474, "logits/real": -0.7192245721817017, "logps/generated": -993.3472900390625, "logps/real": -147.27291870117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.850547790527344, "rewards/margins": 57.4837532043457, "rewards/real": -0.3668076992034912, "step": 2440 }, { "epoch": 1.57, "learning_rate": 2.6528436018957343e-07, "logits/generated": -0.44004377722740173, "logits/real": -0.6283164620399475, "logps/generated": -987.6051025390625, "logps/real": -150.0260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.029502868652344, "rewards/margins": 58.68867111206055, "rewards/real": -0.34083858132362366, "step": 2450 }, { "epoch": 1.57, "learning_rate": 2.640995260663507e-07, "logits/generated": -0.49537092447280884, "logits/real": -0.7183640003204346, "logps/generated": -1005.8214721679688, "logps/real": -139.62448120117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.904502868652344, "rewards/margins": 57.10654830932617, "rewards/real": -0.7979534864425659, "step": 2460 }, { "epoch": 1.58, "learning_rate": 2.629146919431279e-07, "logits/generated": -0.4345122277736664, "logits/real": -0.6478680968284607, "logps/generated": -1020.6052856445312, "logps/real": -167.0375213623047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.1839599609375, "rewards/margins": 59.437049865722656, "rewards/real": -0.7469125986099243, "step": 2470 }, { "epoch": 1.59, "learning_rate": 2.617298578199052e-07, "logits/generated": -0.5310551524162292, "logits/real": -0.6770363450050354, "logps/generated": -997.2672119140625, "logps/real": -142.95449829101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.05806350708008, "rewards/margins": 59.3278923034668, "rewards/real": -0.7301737666130066, "step": 2480 }, { "epoch": 1.59, "learning_rate": 2.6054502369668247e-07, "logits/generated": -0.49158763885498047, "logits/real": -0.7231532335281372, "logps/generated": -1001.833984375, "logps/real": -135.50466918945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.535316467285156, "rewards/margins": 60.20942306518555, "rewards/real": -0.3258832097053528, "step": 2490 }, { "epoch": 1.6, "learning_rate": 2.5936018957345974e-07, "logits/generated": -0.4792296886444092, "logits/real": -0.7686340808868408, "logps/generated": -935.7185668945312, "logps/real": -144.28750610351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -50.79018020629883, "rewards/margins": 50.41889190673828, "rewards/real": -0.37129008769989014, "step": 2500 }, { "epoch": 1.61, "learning_rate": 2.5817535545023696e-07, "logits/generated": -0.45573297142982483, "logits/real": -0.6030322909355164, "logps/generated": -961.43896484375, "logps/real": -175.4803466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.09404754638672, "rewards/margins": 57.536399841308594, "rewards/real": -0.557651698589325, "step": 2510 }, { "epoch": 1.61, "learning_rate": 2.5699052132701423e-07, "logits/generated": -0.4555717408657074, "logits/real": -0.6126461625099182, "logps/generated": -971.4552612304688, "logps/real": -152.1224822998047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.64960479736328, "rewards/margins": 56.96089553833008, "rewards/real": -0.6887052655220032, "step": 2520 }, { "epoch": 1.62, "learning_rate": 2.5580568720379145e-07, "logits/generated": -0.37829676270484924, "logits/real": -0.6520699262619019, "logps/generated": -1022.7066650390625, "logps/real": -145.3723602294922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.4852294921875, "rewards/margins": 64.01722717285156, "rewards/real": -0.46799802780151367, "step": 2530 }, { "epoch": 1.63, "learning_rate": 2.5462085308056867e-07, "logits/generated": -0.4396567940711975, "logits/real": -0.6784273982048035, "logps/generated": -990.3728637695312, "logps/real": -128.4115447998047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.081809997558594, "rewards/margins": 57.558265686035156, "rewards/real": -0.523552417755127, "step": 2540 }, { "epoch": 1.63, "learning_rate": 2.5343601895734595e-07, "logits/generated": -0.45570698380470276, "logits/real": -0.6596937775611877, "logps/generated": -939.7443237304688, "logps/real": -153.6055145263672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.053611755371094, "rewards/margins": 52.287078857421875, "rewards/real": -0.7665325403213501, "step": 2550 }, { "epoch": 1.64, "learning_rate": 2.522511848341232e-07, "logits/generated": -0.46676602959632874, "logits/real": -0.6501291394233704, "logps/generated": -935.5755615234375, "logps/real": -158.39944458007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.2735481262207, "rewards/margins": 52.781654357910156, "rewards/real": -0.491886705160141, "step": 2560 }, { "epoch": 1.64, "learning_rate": 2.510663507109005e-07, "logits/generated": -0.40136367082595825, "logits/real": -0.6050557494163513, "logps/generated": -951.0081176757812, "logps/real": -167.49505615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -54.12383270263672, "rewards/margins": 53.68292236328125, "rewards/real": -0.4409194886684418, "step": 2570 }, { "epoch": 1.65, "learning_rate": 2.498815165876777e-07, "logits/generated": -0.42623743414878845, "logits/real": -0.5959832668304443, "logps/generated": -1012.42724609375, "logps/real": -158.5472412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.254615783691406, "rewards/margins": 58.894569396972656, "rewards/real": -0.3600441813468933, "step": 2580 }, { "epoch": 1.66, "learning_rate": 2.48696682464455e-07, "logits/generated": -0.43621063232421875, "logits/real": -0.6673040390014648, "logps/generated": -950.0224609375, "logps/real": -131.24407958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.44426727294922, "rewards/margins": 54.97258758544922, "rewards/real": -0.47168129682540894, "step": 2590 }, { "epoch": 1.66, "learning_rate": 2.475118483412322e-07, "logits/generated": -0.4405759871006012, "logits/real": -0.6971568465232849, "logps/generated": -993.9846801757812, "logps/real": -135.9408416748047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.888824462890625, "rewards/margins": 57.3767204284668, "rewards/real": -0.5121084451675415, "step": 2600 }, { "epoch": 1.67, "learning_rate": 2.463270142180095e-07, "logits/generated": -0.43571940064430237, "logits/real": -0.5744475722312927, "logps/generated": -1015.73095703125, "logps/real": -158.0771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.4149055480957, "rewards/margins": 60.82659149169922, "rewards/real": -0.5883184671401978, "step": 2610 }, { "epoch": 1.68, "learning_rate": 2.451421800947867e-07, "logits/generated": -0.4443763196468353, "logits/real": -0.6300617456436157, "logps/generated": -948.5537109375, "logps/real": -138.87777709960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -53.80131149291992, "rewards/margins": 53.32474899291992, "rewards/real": -0.4765622019767761, "step": 2620 }, { "epoch": 1.68, "learning_rate": 2.4395734597156397e-07, "logits/generated": -0.4608997702598572, "logits/real": -0.64490807056427, "logps/generated": -968.9459228515625, "logps/real": -141.3875732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -56.779457092285156, "rewards/margins": 56.22953414916992, "rewards/real": -0.5499221682548523, "step": 2630 }, { "epoch": 1.69, "learning_rate": 2.4277251184834124e-07, "logits/generated": -0.4236997663974762, "logits/real": -0.6387981176376343, "logps/generated": -1061.720947265625, "logps/real": -142.55154418945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.07857513427734, "rewards/margins": 64.67992401123047, "rewards/real": -0.39864128828048706, "step": 2640 }, { "epoch": 1.7, "learning_rate": 2.4158767772511846e-07, "logits/generated": -0.4931492805480957, "logits/real": -0.6415807008743286, "logps/generated": -936.0066528320312, "logps/real": -148.57513427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.94257736206055, "rewards/margins": 52.371917724609375, "rewards/real": -0.5706599950790405, "step": 2650 }, { "epoch": 1.7, "learning_rate": 2.4040284360189573e-07, "logits/generated": -0.463541179895401, "logits/real": -0.642052412033081, "logps/generated": -989.3040771484375, "logps/real": -163.30599975585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -58.35044479370117, "rewards/margins": 57.896202087402344, "rewards/real": -0.4542439877986908, "step": 2660 }, { "epoch": 1.71, "learning_rate": 2.39218009478673e-07, "logits/generated": -0.43307337164878845, "logits/real": -0.7004517316818237, "logps/generated": -1077.06884765625, "logps/real": -144.35037231445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.49039459228516, "rewards/margins": 63.91597366333008, "rewards/real": -0.5744192004203796, "step": 2670 }, { "epoch": 1.71, "learning_rate": 2.3803317535545023e-07, "logits/generated": -0.4095051884651184, "logits/real": -0.6760072112083435, "logps/generated": -931.9710693359375, "logps/real": -144.9515380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.00127029418945, "rewards/margins": 54.52852249145508, "rewards/real": -0.4727482795715332, "step": 2680 }, { "epoch": 1.72, "learning_rate": 2.3684834123222747e-07, "logits/generated": -0.4472767412662506, "logits/real": -0.5491870641708374, "logps/generated": -999.7566528320312, "logps/real": -166.29904174804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.471107482910156, "rewards/margins": 58.70185089111328, "rewards/real": -0.7692559361457825, "step": 2690 }, { "epoch": 1.73, "learning_rate": 2.3566350710900475e-07, "logits/generated": -0.41070666909217834, "logits/real": -0.5636172890663147, "logps/generated": -1065.25537109375, "logps/real": -148.3241729736328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.23228454589844, "rewards/margins": 66.70288848876953, "rewards/real": -0.5294026732444763, "step": 2700 }, { "epoch": 1.73, "learning_rate": 2.3447867298578197e-07, "logits/generated": -0.46938830614089966, "logits/real": -0.667069137096405, "logps/generated": -994.6066284179688, "logps/real": -160.06761169433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.15541458129883, "rewards/margins": 59.66267013549805, "rewards/real": -0.49274301528930664, "step": 2710 }, { "epoch": 1.74, "learning_rate": 2.3329383886255924e-07, "logits/generated": -0.4746991991996765, "logits/real": -0.6093307733535767, "logps/generated": -992.4873046875, "logps/real": -160.92660522460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.234031677246094, "rewards/margins": 58.680206298828125, "rewards/real": -0.5538274049758911, "step": 2720 }, { "epoch": 1.75, "learning_rate": 2.3210900473933649e-07, "logits/generated": -0.4014422297477722, "logits/real": -0.6751469373703003, "logps/generated": -929.3016357421875, "logps/real": -150.0366973876953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.02751922607422, "rewards/margins": 54.57123565673828, "rewards/real": -0.4562840461730957, "step": 2730 }, { "epoch": 1.75, "learning_rate": 2.3092417061611373e-07, "logits/generated": -0.44085001945495605, "logits/real": -0.6542856693267822, "logps/generated": -1020.89404296875, "logps/real": -162.69509887695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.03368377685547, "rewards/margins": 61.63750076293945, "rewards/real": -0.3961876928806305, "step": 2740 }, { "epoch": 1.76, "learning_rate": 2.2973933649289098e-07, "logits/generated": -0.45564159750938416, "logits/real": -0.701114296913147, "logps/generated": -1048.5789794921875, "logps/real": -124.47825622558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.25930786132812, "rewards/margins": 64.69044494628906, "rewards/real": -0.5688632130622864, "step": 2750 }, { "epoch": 1.77, "learning_rate": 2.2855450236966822e-07, "logits/generated": -0.5452786087989807, "logits/real": -0.6592291593551636, "logps/generated": -1069.452880859375, "logps/real": -159.27734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.70073699951172, "rewards/margins": 65.43223571777344, "rewards/real": -0.26849886775016785, "step": 2760 }, { "epoch": 1.77, "learning_rate": 2.273696682464455e-07, "logits/generated": -0.46791744232177734, "logits/real": -0.6787043213844299, "logps/generated": -861.5823974609375, "logps/real": -147.97743225097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -45.79942321777344, "rewards/margins": 45.491188049316406, "rewards/real": -0.30823782086372375, "step": 2770 }, { "epoch": 1.78, "learning_rate": 2.2618483412322272e-07, "logits/generated": -0.41990095376968384, "logits/real": -0.6832523345947266, "logps/generated": -932.8555908203125, "logps/real": -131.0704345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -52.52861785888672, "rewards/margins": 52.190757751464844, "rewards/real": -0.3378532826900482, "step": 2780 }, { "epoch": 1.79, "learning_rate": 2.25e-07, "logits/generated": -0.4100368916988373, "logits/real": -0.5835294723510742, "logps/generated": -1038.96630859375, "logps/real": -184.62283325195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.66301727294922, "rewards/margins": 62.26544952392578, "rewards/real": -0.39757412672042847, "step": 2790 }, { "epoch": 1.79, "learning_rate": 2.2381516587677724e-07, "logits/generated": -0.44541412591934204, "logits/real": -0.6417989730834961, "logps/generated": -942.4816284179688, "logps/real": -173.33139038085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -55.49482345581055, "rewards/margins": 55.00482177734375, "rewards/real": -0.49000295996665955, "step": 2800 }, { "epoch": 1.8, "learning_rate": 2.226303317535545e-07, "logits/generated": -0.44918951392173767, "logits/real": -0.6756407618522644, "logps/generated": -1055.998779296875, "logps/real": -164.0789337158203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.55057525634766, "rewards/margins": 63.80998992919922, "rewards/real": -0.7405800819396973, "step": 2810 }, { "epoch": 1.8, "learning_rate": 2.2144549763033173e-07, "logits/generated": -0.3896518051624298, "logits/real": -0.6300621628761292, "logps/generated": -971.3762817382812, "logps/real": -116.8947525024414, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -56.8530158996582, "rewards/margins": 56.00217819213867, "rewards/real": -0.8508402705192566, "step": 2820 }, { "epoch": 1.81, "learning_rate": 2.20260663507109e-07, "logits/generated": -0.40912383794784546, "logits/real": -0.571279764175415, "logps/generated": -1047.6051025390625, "logps/real": -176.30783081054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.89017486572266, "rewards/margins": 67.03627014160156, "rewards/real": -0.8538981676101685, "step": 2830 }, { "epoch": 1.82, "learning_rate": 2.1907582938388625e-07, "logits/generated": -0.3686346113681793, "logits/real": -0.6175917387008667, "logps/generated": -1123.7552490234375, "logps/real": -157.916748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.57915496826172, "rewards/margins": 69.48426055908203, "rewards/real": -1.0949029922485352, "step": 2840 }, { "epoch": 1.82, "learning_rate": 2.178909952606635e-07, "logits/generated": -0.4202900826931, "logits/real": -0.6336459517478943, "logps/generated": -1137.767822265625, "logps/real": -124.10030364990234, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.68309783935547, "rewards/margins": 70.78590393066406, "rewards/real": -0.8971970677375793, "step": 2850 }, { "epoch": 1.83, "learning_rate": 2.1670616113744074e-07, "logits/generated": -0.3994109034538269, "logits/real": -0.634803831577301, "logps/generated": -1005.7017822265625, "logps/real": -169.7325439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.44282150268555, "rewards/margins": 58.54929733276367, "rewards/real": -0.8935245275497437, "step": 2860 }, { "epoch": 1.84, "learning_rate": 2.15521327014218e-07, "logits/generated": -0.3749557137489319, "logits/real": -0.6287773251533508, "logps/generated": -1105.1219482421875, "logps/real": -147.91397094726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.544677734375, "rewards/margins": 68.75887298583984, "rewards/real": -0.785801351070404, "step": 2870 }, { "epoch": 1.84, "learning_rate": 2.1433649289099526e-07, "logits/generated": -0.4119029641151428, "logits/real": -0.5460438132286072, "logps/generated": -1043.18310546875, "logps/real": -165.08152770996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.2020263671875, "rewards/margins": 62.2581672668457, "rewards/real": -0.9438508749008179, "step": 2880 }, { "epoch": 1.85, "learning_rate": 2.131516587677725e-07, "logits/generated": -0.40759310126304626, "logits/real": -0.5606673955917358, "logps/generated": -1038.422119140625, "logps/real": -154.99227905273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.8538818359375, "rewards/margins": 61.08037567138672, "rewards/real": -0.77350914478302, "step": 2890 }, { "epoch": 1.86, "learning_rate": 2.1196682464454975e-07, "logits/generated": -0.40789279341697693, "logits/real": -0.6496740579605103, "logps/generated": -1112.929931640625, "logps/real": -132.43563842773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.68193054199219, "rewards/margins": 69.83362579345703, "rewards/real": -0.8483074903488159, "step": 2900 }, { "epoch": 1.86, "learning_rate": 2.10781990521327e-07, "logits/generated": -0.37787383794784546, "logits/real": -0.5792855024337769, "logps/generated": -992.4094848632812, "logps/real": -154.80380249023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.6590461730957, "rewards/margins": 56.728843688964844, "rewards/real": -0.9301955103874207, "step": 2910 }, { "epoch": 1.87, "learning_rate": 2.0959715639810427e-07, "logits/generated": -0.4275107979774475, "logits/real": -0.6507991552352905, "logps/generated": -1179.656982421875, "logps/real": -151.2890167236328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.20372009277344, "rewards/margins": 75.1134262084961, "rewards/real": -1.0902981758117676, "step": 2920 }, { "epoch": 1.87, "learning_rate": 2.0841232227488152e-07, "logits/generated": -0.4114235043525696, "logits/real": -0.6667225360870361, "logps/generated": -1071.714599609375, "logps/real": -133.47251892089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.06950378417969, "rewards/margins": 67.27735137939453, "rewards/real": -0.7921562790870667, "step": 2930 }, { "epoch": 1.88, "learning_rate": 2.0722748815165874e-07, "logits/generated": -0.41574984788894653, "logits/real": -0.6365878582000732, "logps/generated": -1119.65087890625, "logps/real": -142.26431274414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.78968811035156, "rewards/margins": 70.04450988769531, "rewards/real": -0.745174765586853, "step": 2940 }, { "epoch": 1.89, "learning_rate": 2.06042654028436e-07, "logits/generated": -0.37902599573135376, "logits/real": -0.5311517119407654, "logps/generated": -1179.3172607421875, "logps/real": -160.26150512695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.18898010253906, "rewards/margins": 72.45357513427734, "rewards/real": -0.7354053258895874, "step": 2950 }, { "epoch": 1.89, "learning_rate": 2.0485781990521326e-07, "logits/generated": -0.3815317153930664, "logits/real": -0.5580254793167114, "logps/generated": -1023.4977416992188, "logps/real": -177.36837768554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.73035430908203, "rewards/margins": 61.690452575683594, "rewards/real": -1.0398961305618286, "step": 2960 }, { "epoch": 1.9, "learning_rate": 2.0367298578199053e-07, "logits/generated": -0.44472736120224, "logits/real": -0.6714332699775696, "logps/generated": -1140.576416015625, "logps/real": -164.40725708007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.26954650878906, "rewards/margins": 71.59504699707031, "rewards/real": -0.6745188236236572, "step": 2970 }, { "epoch": 1.91, "learning_rate": 2.0248815165876775e-07, "logits/generated": -0.4064870774745941, "logits/real": -0.6100367903709412, "logps/generated": -1150.54833984375, "logps/real": -160.94125366210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.72151184082031, "rewards/margins": 74.99349975585938, "rewards/real": -0.7280232906341553, "step": 2980 }, { "epoch": 1.91, "learning_rate": 2.0130331753554502e-07, "logits/generated": -0.4334731698036194, "logits/real": -0.6273586750030518, "logps/generated": -1114.251220703125, "logps/real": -167.9752655029297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.68013000488281, "rewards/margins": 68.87593078613281, "rewards/real": -0.8041984438896179, "step": 2990 }, { "epoch": 1.92, "learning_rate": 2.0011848341232227e-07, "logits/generated": -0.37390297651290894, "logits/real": -0.5925924777984619, "logps/generated": -1171.7125244140625, "logps/real": -151.46539306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.30975341796875, "rewards/margins": 75.49763488769531, "rewards/real": -0.8121153712272644, "step": 3000 }, { "epoch": 1.93, "learning_rate": 1.9893364928909952e-07, "logits/generated": -0.3266315758228302, "logits/real": -0.5822888612747192, "logps/generated": -1063.568115234375, "logps/real": -150.86004638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.58686828613281, "rewards/margins": 63.89935302734375, "rewards/real": -0.6875194311141968, "step": 3010 }, { "epoch": 1.93, "learning_rate": 1.9774881516587676e-07, "logits/generated": -0.38277262449264526, "logits/real": -0.5801711678504944, "logps/generated": -1175.657470703125, "logps/real": -141.32138061523438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.63087463378906, "rewards/margins": 76.90750122070312, "rewards/real": -0.7233678102493286, "step": 3020 }, { "epoch": 1.94, "learning_rate": 1.96563981042654e-07, "logits/generated": -0.35185354948043823, "logits/real": -0.6176477670669556, "logps/generated": -1025.0194091796875, "logps/real": -130.98770141601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.04143524169922, "rewards/margins": 61.38140106201172, "rewards/real": -0.6600403189659119, "step": 3030 }, { "epoch": 1.94, "learning_rate": 1.9537914691943128e-07, "logits/generated": -0.40409189462661743, "logits/real": -0.596308708190918, "logps/generated": -1096.9437255859375, "logps/real": -167.45077514648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.10643768310547, "rewards/margins": 67.09183502197266, "rewards/real": -1.014599084854126, "step": 3040 }, { "epoch": 1.95, "learning_rate": 1.9419431279620853e-07, "logits/generated": -0.356533408164978, "logits/real": -0.5840550661087036, "logps/generated": -1120.258056640625, "logps/real": -153.67636108398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.5401840209961, "rewards/margins": 70.77323913574219, "rewards/real": -0.7669495940208435, "step": 3050 }, { "epoch": 1.96, "learning_rate": 1.9300947867298577e-07, "logits/generated": -0.39099499583244324, "logits/real": -0.5445195436477661, "logps/generated": -1096.966552734375, "logps/real": -185.20242309570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.42916107177734, "rewards/margins": 67.67522430419922, "rewards/real": -0.7539411187171936, "step": 3060 }, { "epoch": 1.96, "learning_rate": 1.9182464454976302e-07, "logits/generated": -0.3674711287021637, "logits/real": -0.6313090920448303, "logps/generated": -998.6416015625, "logps/real": -132.3099365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -59.564292907714844, "rewards/margins": 58.874427795410156, "rewards/real": -0.6898680329322815, "step": 3070 }, { "epoch": 1.97, "learning_rate": 1.906398104265403e-07, "logits/generated": -0.3422473669052124, "logits/real": -0.5261892676353455, "logps/generated": -1169.02880859375, "logps/real": -149.87451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.00128173828125, "rewards/margins": 75.43621063232422, "rewards/real": -0.5650706887245178, "step": 3080 }, { "epoch": 1.98, "learning_rate": 1.8945497630331754e-07, "logits/generated": -0.40371593832969666, "logits/real": -0.5681861042976379, "logps/generated": -989.8043823242188, "logps/real": -161.1966552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.001930236816406, "rewards/margins": 59.34397506713867, "rewards/real": -0.6579534411430359, "step": 3090 }, { "epoch": 1.98, "learning_rate": 1.8827014218009476e-07, "logits/generated": -0.3659510016441345, "logits/real": -0.5965005159378052, "logps/generated": -1124.320068359375, "logps/real": -151.7841033935547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.14144897460938, "rewards/margins": 72.28865051269531, "rewards/real": -0.8528071641921997, "step": 3100 }, { "epoch": 1.99, "learning_rate": 1.8708530805687203e-07, "logits/generated": -0.33026427030563354, "logits/real": -0.5088328719139099, "logps/generated": -1115.7996826171875, "logps/real": -135.47061157226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.01557159423828, "rewards/margins": 70.09529113769531, "rewards/real": -0.9202736020088196, "step": 3110 }, { "epoch": 2.0, "learning_rate": 1.8590047393364928e-07, "logits/generated": -0.35965341329574585, "logits/real": -0.5917715430259705, "logps/generated": -1188.393310546875, "logps/real": -164.9335174560547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -79.32763671875, "rewards/margins": 78.70462036132812, "rewards/real": -0.623020589351654, "step": 3120 }, { "epoch": 2.0, "learning_rate": 1.8471563981042655e-07, "logits/generated": -0.3648239076137543, "logits/real": -0.5664030313491821, "logps/generated": -1057.7745361328125, "logps/real": -164.2882080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.43244171142578, "rewards/margins": 64.76323699951172, "rewards/real": -0.6691963076591492, "step": 3130 }, { "epoch": 2.01, "learning_rate": 1.8353080568720377e-07, "logits/generated": -0.2945231795310974, "logits/real": -0.539868950843811, "logps/generated": -1135.911865234375, "logps/real": -126.64066314697266, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.74871063232422, "rewards/margins": 74.0860595703125, "rewards/real": -0.6626566648483276, "step": 3140 }, { "epoch": 2.02, "learning_rate": 1.8234597156398104e-07, "logits/generated": -0.32069313526153564, "logits/real": -0.5126243829727173, "logps/generated": -1169.849365234375, "logps/real": -147.9727783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.32138061523438, "rewards/margins": 75.6849136352539, "rewards/real": -0.6364642381668091, "step": 3150 }, { "epoch": 2.02, "learning_rate": 1.811611374407583e-07, "logits/generated": -0.3039155900478363, "logits/real": -0.5434777140617371, "logps/generated": -1120.1385498046875, "logps/real": -150.61309814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.19499206542969, "rewards/margins": 71.50361633300781, "rewards/real": -0.6913769841194153, "step": 3160 }, { "epoch": 2.03, "learning_rate": 1.7997630331753554e-07, "logits/generated": -0.33049115538597107, "logits/real": -0.5453459620475769, "logps/generated": -1100.5694580078125, "logps/real": -168.0435791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.0591812133789, "rewards/margins": 69.451904296875, "rewards/real": -0.6072803139686584, "step": 3170 }, { "epoch": 2.03, "learning_rate": 1.7879146919431278e-07, "logits/generated": -0.3514329493045807, "logits/real": -0.5001763105392456, "logps/generated": -1134.4805908203125, "logps/real": -180.07357788085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.09115600585938, "rewards/margins": 72.2973403930664, "rewards/real": -0.793817400932312, "step": 3180 }, { "epoch": 2.04, "learning_rate": 1.7760663507109003e-07, "logits/generated": -0.27006787061691284, "logits/real": -0.4615755081176758, "logps/generated": -1064.807861328125, "logps/real": -157.30935668945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.96697998046875, "rewards/margins": 67.21857452392578, "rewards/real": -0.7484084963798523, "step": 3190 }, { "epoch": 2.05, "learning_rate": 1.764218009478673e-07, "logits/generated": -0.33314403891563416, "logits/real": -0.5476816892623901, "logps/generated": -1177.116943359375, "logps/real": -133.23471069335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -78.40288543701172, "rewards/margins": 77.6571044921875, "rewards/real": -0.7457820177078247, "step": 3200 }, { "epoch": 2.05, "learning_rate": 1.7523696682464452e-07, "logits/generated": -0.3331630825996399, "logits/real": -0.5389949083328247, "logps/generated": -1134.275146484375, "logps/real": -167.80189514160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.55030059814453, "rewards/margins": 69.66114807128906, "rewards/real": -0.8891481161117554, "step": 3210 }, { "epoch": 2.06, "learning_rate": 1.740521327014218e-07, "logits/generated": -0.3793897330760956, "logits/real": -0.5896502733230591, "logps/generated": -1118.945068359375, "logps/real": -142.81280517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.79044342041016, "rewards/margins": 71.0679702758789, "rewards/real": -0.7224776148796082, "step": 3220 }, { "epoch": 2.07, "learning_rate": 1.7286729857819904e-07, "logits/generated": -0.34748396277427673, "logits/real": -0.5819012522697449, "logps/generated": -1131.2802734375, "logps/real": -144.39434814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.58968353271484, "rewards/margins": 70.8386001586914, "rewards/real": -0.7510883212089539, "step": 3230 }, { "epoch": 2.07, "learning_rate": 1.7168246445497631e-07, "logits/generated": -0.35045960545539856, "logits/real": -0.5766857862472534, "logps/generated": -1157.075927734375, "logps/real": -183.49752807617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.64195251464844, "rewards/margins": 73.91766357421875, "rewards/real": -0.7242866158485413, "step": 3240 }, { "epoch": 2.08, "learning_rate": 1.7049763033175353e-07, "logits/generated": -0.36225640773773193, "logits/real": -0.6122447848320007, "logps/generated": -1231.66064453125, "logps/real": -159.02023315429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -81.8363037109375, "rewards/margins": 81.19940185546875, "rewards/real": -0.636897623538971, "step": 3250 }, { "epoch": 2.09, "learning_rate": 1.693127962085308e-07, "logits/generated": -0.3631829619407654, "logits/real": -0.5321738123893738, "logps/generated": -1131.0858154296875, "logps/real": -174.25485229492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.96878051757812, "rewards/margins": 70.31332397460938, "rewards/real": -0.6554469466209412, "step": 3260 }, { "epoch": 2.09, "learning_rate": 1.6812796208530805e-07, "logits/generated": -0.2980991005897522, "logits/real": -0.4874647557735443, "logps/generated": -1105.4827880859375, "logps/real": -152.40386962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.46055603027344, "rewards/margins": 65.54535675048828, "rewards/real": -0.9151935577392578, "step": 3270 }, { "epoch": 2.1, "learning_rate": 1.669431279620853e-07, "logits/generated": -0.28592294454574585, "logits/real": -0.4887320101261139, "logps/generated": -1115.578369140625, "logps/real": -185.43649291992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.00923156738281, "rewards/margins": 69.16118621826172, "rewards/real": -0.8480373620986938, "step": 3280 }, { "epoch": 2.1, "learning_rate": 1.6575829383886255e-07, "logits/generated": -0.37043094635009766, "logits/real": -0.579189658164978, "logps/generated": -1242.7139892578125, "logps/real": -150.39120483398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -83.98506927490234, "rewards/margins": 83.05586242675781, "rewards/real": -0.9292176961898804, "step": 3290 }, { "epoch": 2.11, "learning_rate": 1.645734597156398e-07, "logits/generated": -0.28565752506256104, "logits/real": -0.5570210218429565, "logps/generated": -1072.3984375, "logps/real": -139.39593505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.31278991699219, "rewards/margins": 68.64981079101562, "rewards/real": -0.6629735231399536, "step": 3300 }, { "epoch": 2.12, "learning_rate": 1.6338862559241706e-07, "logits/generated": -0.36683911085128784, "logits/real": -0.6102225184440613, "logps/generated": -1144.6790771484375, "logps/real": -150.28924560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.97947692871094, "rewards/margins": 74.34928894042969, "rewards/real": -0.6301820278167725, "step": 3310 }, { "epoch": 2.12, "learning_rate": 1.622037914691943e-07, "logits/generated": -0.3236832916736603, "logits/real": -0.5270904302597046, "logps/generated": -1184.956787109375, "logps/real": -147.99168395996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.87047576904297, "rewards/margins": 77.00102233886719, "rewards/real": -0.8694450259208679, "step": 3320 }, { "epoch": 2.13, "learning_rate": 1.6101895734597156e-07, "logits/generated": -0.373867928981781, "logits/real": -0.5834953784942627, "logps/generated": -1108.7701416015625, "logps/real": -163.93594360351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.46839141845703, "rewards/margins": 69.51911926269531, "rewards/real": -0.9492788314819336, "step": 3330 }, { "epoch": 2.14, "learning_rate": 1.598341232227488e-07, "logits/generated": -0.3623445928096771, "logits/real": -0.5558930039405823, "logps/generated": -1033.65771484375, "logps/real": -145.7788848876953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -63.11626434326172, "rewards/margins": 62.55774688720703, "rewards/real": -0.5585171580314636, "step": 3340 }, { "epoch": 2.14, "learning_rate": 1.5864928909952605e-07, "logits/generated": -0.3441459834575653, "logits/real": -0.5500830411911011, "logps/generated": -1076.542724609375, "logps/real": -142.98782348632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.62294006347656, "rewards/margins": 69.04898071289062, "rewards/real": -0.5739551186561584, "step": 3350 }, { "epoch": 2.15, "learning_rate": 1.5746445497630332e-07, "logits/generated": -0.32716110348701477, "logits/real": -0.5438711047172546, "logps/generated": -1147.177978515625, "logps/real": -145.17544555664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.67416381835938, "rewards/margins": 71.88451385498047, "rewards/real": -0.7896552085876465, "step": 3360 }, { "epoch": 2.16, "learning_rate": 1.5627962085308054e-07, "logits/generated": -0.3573629856109619, "logits/real": -0.5217954516410828, "logps/generated": -1130.4320068359375, "logps/real": -172.75650024414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.90027618408203, "rewards/margins": 71.09027862548828, "rewards/real": -0.8099902868270874, "step": 3370 }, { "epoch": 2.16, "learning_rate": 1.5509478672985782e-07, "logits/generated": -0.34415721893310547, "logits/real": -0.5328197479248047, "logps/generated": -956.2267456054688, "logps/real": -184.76602172851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -57.68220901489258, "rewards/margins": 56.873924255371094, "rewards/real": -0.8082860708236694, "step": 3380 }, { "epoch": 2.17, "learning_rate": 1.5390995260663506e-07, "logits/generated": -0.3490820825099945, "logits/real": -0.5200581550598145, "logps/generated": -1079.313720703125, "logps/real": -158.89695739746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.39738464355469, "rewards/margins": 67.54957580566406, "rewards/real": -0.8478103876113892, "step": 3390 }, { "epoch": 2.18, "learning_rate": 1.5272511848341233e-07, "logits/generated": -0.32425522804260254, "logits/real": -0.5338164567947388, "logps/generated": -1100.94873046875, "logps/real": -160.40975952148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.0157699584961, "rewards/margins": 68.4029312133789, "rewards/real": -0.6128430366516113, "step": 3400 }, { "epoch": 2.18, "learning_rate": 1.5154028436018955e-07, "logits/generated": -0.3767894208431244, "logits/real": -0.5962772965431213, "logps/generated": -1119.630615234375, "logps/real": -164.57717895507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.62731170654297, "rewards/margins": 69.80793762207031, "rewards/real": -0.8193785548210144, "step": 3410 }, { "epoch": 2.19, "learning_rate": 1.5035545023696683e-07, "logits/generated": -0.3327923119068146, "logits/real": -0.5231298804283142, "logps/generated": -1091.9482421875, "logps/real": -159.0697021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.09928894042969, "rewards/margins": 66.29541015625, "rewards/real": -0.8038908243179321, "step": 3420 }, { "epoch": 2.19, "learning_rate": 1.4917061611374407e-07, "logits/generated": -0.37766391038894653, "logits/real": -0.5465415716171265, "logps/generated": -1140.1834716796875, "logps/real": -176.43777465820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.69719696044922, "rewards/margins": 72.85856628417969, "rewards/real": -0.8386209607124329, "step": 3430 }, { "epoch": 2.2, "learning_rate": 1.4798578199052132e-07, "logits/generated": -0.3288564682006836, "logits/real": -0.6394492983818054, "logps/generated": -1092.682861328125, "logps/real": -138.92721557617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.9786148071289, "rewards/margins": 68.30329895019531, "rewards/real": -0.6753060817718506, "step": 3440 }, { "epoch": 2.21, "learning_rate": 1.4680094786729857e-07, "logits/generated": -0.3352881371974945, "logits/real": -0.5174044370651245, "logps/generated": -1148.893798828125, "logps/real": -157.83375549316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.80946350097656, "rewards/margins": 74.113037109375, "rewards/real": -0.6964321136474609, "step": 3450 }, { "epoch": 2.21, "learning_rate": 1.456161137440758e-07, "logits/generated": -0.38338786363601685, "logits/real": -0.6143153309822083, "logps/generated": -1172.7335205078125, "logps/real": -150.6049041748047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.13297271728516, "rewards/margins": 73.13155364990234, "rewards/real": -1.0014207363128662, "step": 3460 }, { "epoch": 2.22, "learning_rate": 1.4443127962085309e-07, "logits/generated": -0.34384411573410034, "logits/real": -0.585628867149353, "logps/generated": -1103.96923828125, "logps/real": -153.5775604248047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.13743591308594, "rewards/margins": 69.50370025634766, "rewards/real": -0.6337412595748901, "step": 3470 }, { "epoch": 2.23, "learning_rate": 1.4324644549763033e-07, "logits/generated": -0.34684544801712036, "logits/real": -0.6211697459220886, "logps/generated": -1069.9642333984375, "logps/real": -169.10153198242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.88719177246094, "rewards/margins": 67.12832641601562, "rewards/real": -0.7588711380958557, "step": 3480 }, { "epoch": 2.23, "learning_rate": 1.4206161137440758e-07, "logits/generated": -0.40978360176086426, "logits/real": -0.5079740881919861, "logps/generated": -1203.1060791015625, "logps/real": -182.2041778564453, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -81.18981170654297, "rewards/margins": 80.14823913574219, "rewards/real": -1.0415685176849365, "step": 3490 }, { "epoch": 2.24, "learning_rate": 1.4087677725118482e-07, "logits/generated": -0.3429732024669647, "logits/real": -0.6365989446640015, "logps/generated": -1202.0032958984375, "logps/real": -135.09603881835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -79.1153793334961, "rewards/margins": 78.39237976074219, "rewards/real": -0.7229984998703003, "step": 3500 }, { "epoch": 2.25, "learning_rate": 1.396919431279621e-07, "logits/generated": -0.3157784044742584, "logits/real": -0.5285521149635315, "logps/generated": -1134.604248046875, "logps/real": -162.54391479492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.02388763427734, "rewards/margins": 71.22061157226562, "rewards/real": -0.8032848238945007, "step": 3510 }, { "epoch": 2.25, "learning_rate": 1.3850710900473934e-07, "logits/generated": -0.38203898072242737, "logits/real": -0.549940824508667, "logps/generated": -1131.8427734375, "logps/real": -152.9811248779297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.25450897216797, "rewards/margins": 71.60226440429688, "rewards/real": -0.6522516012191772, "step": 3520 }, { "epoch": 2.26, "learning_rate": 1.3732227488151656e-07, "logits/generated": -0.3488486707210541, "logits/real": -0.39327472448349, "logps/generated": -1084.085205078125, "logps/real": -202.0915985107422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.83716583251953, "rewards/margins": 67.02722930908203, "rewards/real": -0.8099360466003418, "step": 3530 }, { "epoch": 2.26, "learning_rate": 1.3613744075829384e-07, "logits/generated": -0.29847994446754456, "logits/real": -0.5526587963104248, "logps/generated": -1222.8707275390625, "logps/real": -151.45326232910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -84.52772521972656, "rewards/margins": 83.77813720703125, "rewards/real": -0.7495924234390259, "step": 3540 }, { "epoch": 2.27, "learning_rate": 1.3495260663507108e-07, "logits/generated": -0.3559364676475525, "logits/real": -0.5464509725570679, "logps/generated": -1149.019775390625, "logps/real": -144.06759643554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.87704467773438, "rewards/margins": 72.91664123535156, "rewards/real": -0.9604147672653198, "step": 3550 }, { "epoch": 2.28, "learning_rate": 1.3376777251184836e-07, "logits/generated": -0.3057738244533539, "logits/real": -0.5685640573501587, "logps/generated": -1133.9283447265625, "logps/real": -161.63894653320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.35575103759766, "rewards/margins": 73.46749114990234, "rewards/real": -0.8882623910903931, "step": 3560 }, { "epoch": 2.28, "learning_rate": 1.3258293838862558e-07, "logits/generated": -0.3922407031059265, "logits/real": -0.6674994230270386, "logps/generated": -1132.536865234375, "logps/real": -146.58091735839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.54347229003906, "rewards/margins": 70.5415267944336, "rewards/real": -1.0019347667694092, "step": 3570 }, { "epoch": 2.29, "learning_rate": 1.3139810426540285e-07, "logits/generated": -0.33480846881866455, "logits/real": -0.5054049491882324, "logps/generated": -1200.821044921875, "logps/real": -152.25894165039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.86302185058594, "rewards/margins": 77.09458923339844, "rewards/real": -0.7684418559074402, "step": 3580 }, { "epoch": 2.3, "learning_rate": 1.302132701421801e-07, "logits/generated": -0.2758890986442566, "logits/real": -0.5493889451026917, "logps/generated": -1093.770751953125, "logps/real": -157.31973266601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.96113586425781, "rewards/margins": 69.14842987060547, "rewards/real": -0.8127008676528931, "step": 3590 }, { "epoch": 2.3, "learning_rate": 1.2902843601895734e-07, "logits/generated": -0.3500101566314697, "logits/real": -0.5593122243881226, "logps/generated": -1206.6497802734375, "logps/real": -159.24282836914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -82.6231689453125, "rewards/margins": 81.7955551147461, "rewards/real": -0.8276035189628601, "step": 3600 }, { "epoch": 2.31, "learning_rate": 1.278436018957346e-07, "logits/generated": -0.3821162283420563, "logits/real": -0.6203012466430664, "logps/generated": -1269.4385986328125, "logps/real": -147.83021545410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -88.19657897949219, "rewards/margins": 87.15613555908203, "rewards/real": -1.0404458045959473, "step": 3610 }, { "epoch": 2.32, "learning_rate": 1.2665876777251183e-07, "logits/generated": -0.4196929931640625, "logits/real": -0.6896005868911743, "logps/generated": -1157.632568359375, "logps/real": -161.3779754638672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.62742614746094, "rewards/margins": 74.22064208984375, "rewards/real": -0.40678733587265015, "step": 3620 }, { "epoch": 2.32, "learning_rate": 1.254739336492891e-07, "logits/generated": -0.46330398321151733, "logits/real": -0.6500687003135681, "logps/generated": -1221.859130859375, "logps/real": -170.46438598632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -80.08015441894531, "rewards/margins": 79.51933288574219, "rewards/real": -0.5608130693435669, "step": 3630 }, { "epoch": 2.33, "learning_rate": 1.2428909952606635e-07, "logits/generated": -0.4650397300720215, "logits/real": -0.6424258351325989, "logps/generated": -1196.016357421875, "logps/real": -148.45826721191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.94972229003906, "rewards/margins": 76.64620208740234, "rewards/real": -0.3035140633583069, "step": 3640 }, { "epoch": 2.34, "learning_rate": 1.231042654028436e-07, "logits/generated": -0.4637749195098877, "logits/real": -0.6913520097732544, "logps/generated": -1081.8062744140625, "logps/real": -143.20822143554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.7381820678711, "rewards/margins": 68.34100341796875, "rewards/real": -0.39719128608703613, "step": 3650 }, { "epoch": 2.34, "learning_rate": 1.2191943127962085e-07, "logits/generated": -0.4392772614955902, "logits/real": -0.5955749750137329, "logps/generated": -1123.980712890625, "logps/real": -200.45895385742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.12271881103516, "rewards/margins": 71.51152038574219, "rewards/real": -0.6112003326416016, "step": 3660 }, { "epoch": 2.35, "learning_rate": 1.207345971563981e-07, "logits/generated": -0.43859052658081055, "logits/real": -0.5733956098556519, "logps/generated": -1162.6119384765625, "logps/real": -168.77096557617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.10337829589844, "rewards/margins": 76.67765045166016, "rewards/real": -0.42572155594825745, "step": 3670 }, { "epoch": 2.35, "learning_rate": 1.1954976303317534e-07, "logits/generated": -0.47363168001174927, "logits/real": -0.6165460348129272, "logps/generated": -1090.505859375, "logps/real": -164.0859832763672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.29020690917969, "rewards/margins": 69.48956298828125, "rewards/real": -0.8006424903869629, "step": 3680 }, { "epoch": 2.36, "learning_rate": 1.183649289099526e-07, "logits/generated": -0.41170722246170044, "logits/real": -0.6370391249656677, "logps/generated": -1108.5679931640625, "logps/real": -160.58328247070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.86441040039062, "rewards/margins": 71.31538391113281, "rewards/real": -0.549027681350708, "step": 3690 }, { "epoch": 2.37, "learning_rate": 1.1718009478672986e-07, "logits/generated": -0.379972368478775, "logits/real": -0.6687403321266174, "logps/generated": -1153.8831787109375, "logps/real": -136.22607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.22931671142578, "rewards/margins": 74.69772338867188, "rewards/real": -0.5315843820571899, "step": 3700 }, { "epoch": 2.37, "learning_rate": 1.159952606635071e-07, "logits/generated": -0.39145171642303467, "logits/real": -0.5640333294868469, "logps/generated": -1174.184326171875, "logps/real": -156.5506591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.92201232910156, "rewards/margins": 76.40318298339844, "rewards/real": -0.5188380479812622, "step": 3710 }, { "epoch": 2.38, "learning_rate": 1.1481042654028436e-07, "logits/generated": -0.4219874441623688, "logits/real": -0.6748846769332886, "logps/generated": -1189.40771484375, "logps/real": -150.1600799560547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.53298950195312, "rewards/margins": 76.97785949707031, "rewards/real": -0.5551234483718872, "step": 3720 }, { "epoch": 2.39, "learning_rate": 1.136255924170616e-07, "logits/generated": -0.40171951055526733, "logits/real": -0.5924757122993469, "logps/generated": -1085.5384521484375, "logps/real": -151.51275634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.87525177001953, "rewards/margins": 69.43013763427734, "rewards/real": -0.4451100826263428, "step": 3730 }, { "epoch": 2.39, "learning_rate": 1.1244075829383886e-07, "logits/generated": -0.46468549966812134, "logits/real": -0.6485167145729065, "logps/generated": -1184.935546875, "logps/real": -141.2152099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.06682586669922, "rewards/margins": 76.591064453125, "rewards/real": -0.47575807571411133, "step": 3740 }, { "epoch": 2.4, "learning_rate": 1.112559241706161e-07, "logits/generated": -0.4084410071372986, "logits/real": -0.5791139602661133, "logps/generated": -1158.128173828125, "logps/real": -140.56553649902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.33256530761719, "rewards/margins": 73.85154724121094, "rewards/real": -0.48102912306785583, "step": 3750 }, { "epoch": 2.41, "learning_rate": 1.1007109004739336e-07, "logits/generated": -0.3835846781730652, "logits/real": -0.6427528262138367, "logps/generated": -1111.537109375, "logps/real": -120.40766906738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.03950500488281, "rewards/margins": 72.73836517333984, "rewards/real": -0.3011349141597748, "step": 3760 }, { "epoch": 2.41, "learning_rate": 1.0888625592417061e-07, "logits/generated": -0.38520628213882446, "logits/real": -0.5476213693618774, "logps/generated": -1175.740966796875, "logps/real": -150.56874084472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.73905944824219, "rewards/margins": 76.24748992919922, "rewards/real": -0.4915708601474762, "step": 3770 }, { "epoch": 2.42, "learning_rate": 1.0770142180094787e-07, "logits/generated": -0.4786599576473236, "logits/real": -0.6190992593765259, "logps/generated": -1177.5657958984375, "logps/real": -171.1073455810547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.37752532958984, "rewards/margins": 74.78016662597656, "rewards/real": -0.5973631143569946, "step": 3780 }, { "epoch": 2.42, "learning_rate": 1.0651658767772511e-07, "logits/generated": -0.4746522307395935, "logits/real": -0.6191864609718323, "logps/generated": -1131.2122802734375, "logps/real": -170.5677490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.63467407226562, "rewards/margins": 70.15538024902344, "rewards/real": -0.4793027341365814, "step": 3790 }, { "epoch": 2.43, "learning_rate": 1.0533175355450237e-07, "logits/generated": -0.4551950991153717, "logits/real": -0.6525193452835083, "logps/generated": -1122.6358642578125, "logps/real": -159.02659606933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.77278137207031, "rewards/margins": 70.19547271728516, "rewards/real": -0.5773108005523682, "step": 3800 }, { "epoch": 2.44, "learning_rate": 1.0414691943127962e-07, "logits/generated": -0.4288361668586731, "logits/real": -0.6204260587692261, "logps/generated": -1192.144775390625, "logps/real": -163.85079956054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.51756286621094, "rewards/margins": 74.76011657714844, "rewards/real": -0.7574476003646851, "step": 3810 }, { "epoch": 2.44, "learning_rate": 1.0296208530805687e-07, "logits/generated": -0.4227616786956787, "logits/real": -0.5756683945655823, "logps/generated": -1093.92529296875, "logps/real": -168.95413208007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.02093505859375, "rewards/margins": 66.26923370361328, "rewards/real": -0.7517085671424866, "step": 3820 }, { "epoch": 2.45, "learning_rate": 1.0177725118483411e-07, "logits/generated": -0.400717556476593, "logits/real": -0.5438157320022583, "logps/generated": -1066.676025390625, "logps/real": -157.6385040283203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.5630874633789, "rewards/margins": 65.72784423828125, "rewards/real": -0.8352400064468384, "step": 3830 }, { "epoch": 2.46, "learning_rate": 1.0059241706161137e-07, "logits/generated": -0.394951194524765, "logits/real": -0.5915923118591309, "logps/generated": -1111.5635986328125, "logps/real": -151.64712524414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.04698181152344, "rewards/margins": 70.56211853027344, "rewards/real": -0.4848620295524597, "step": 3840 }, { "epoch": 2.46, "learning_rate": 9.940758293838862e-08, "logits/generated": -0.3858771026134491, "logits/real": -0.6399182081222534, "logps/generated": -1137.333740234375, "logps/real": -133.06719970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.79722595214844, "rewards/margins": 71.40068054199219, "rewards/real": -0.3965340256690979, "step": 3850 }, { "epoch": 2.47, "learning_rate": 9.822274881516588e-08, "logits/generated": -0.40112823247909546, "logits/real": -0.6340306401252747, "logps/generated": -1152.3155517578125, "logps/real": -146.1147918701172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.04869079589844, "rewards/margins": 74.36548614501953, "rewards/real": -0.6832191944122314, "step": 3860 }, { "epoch": 2.48, "learning_rate": 9.703791469194312e-08, "logits/generated": -0.3981134295463562, "logits/real": -0.615269660949707, "logps/generated": -1170.2542724609375, "logps/real": -125.6851577758789, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.76252746582031, "rewards/margins": 77.17864990234375, "rewards/real": -0.5838753581047058, "step": 3870 }, { "epoch": 2.48, "learning_rate": 9.585308056872038e-08, "logits/generated": -0.44058480858802795, "logits/real": -0.5800412893295288, "logps/generated": -1036.949951171875, "logps/real": -154.7644500732422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.33032989501953, "rewards/margins": 64.915283203125, "rewards/real": -0.41504526138305664, "step": 3880 }, { "epoch": 2.49, "learning_rate": 9.466824644549763e-08, "logits/generated": -0.4272391200065613, "logits/real": -0.5816367864608765, "logps/generated": -1235.927734375, "logps/real": -135.92108154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -83.16920471191406, "rewards/margins": 82.63643646240234, "rewards/real": -0.5327636003494263, "step": 3890 }, { "epoch": 2.5, "learning_rate": 9.348341232227488e-08, "logits/generated": -0.45391201972961426, "logits/real": -0.5931220054626465, "logps/generated": -1152.2362060546875, "logps/real": -149.9822540283203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.86837768554688, "rewards/margins": 74.22323608398438, "rewards/real": -0.6451278924942017, "step": 3900 }, { "epoch": 2.5, "learning_rate": 9.229857819905212e-08, "logits/generated": -0.42041435837745667, "logits/real": -0.5749475359916687, "logps/generated": -1185.0155029296875, "logps/real": -165.84779357910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.07542419433594, "rewards/margins": 75.43386840820312, "rewards/real": -0.6415479183197021, "step": 3910 }, { "epoch": 2.51, "learning_rate": 9.111374407582938e-08, "logits/generated": -0.3552473187446594, "logits/real": -0.5435065031051636, "logps/generated": -1080.6820068359375, "logps/real": -149.6726837158203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.79393005371094, "rewards/margins": 67.18496704101562, "rewards/real": -0.6089592576026917, "step": 3920 }, { "epoch": 2.51, "learning_rate": 8.992890995260663e-08, "logits/generated": -0.36526188254356384, "logits/real": -0.542805552482605, "logps/generated": -1186.941650390625, "logps/real": -147.6852264404297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.81428527832031, "rewards/margins": 76.37767791748047, "rewards/real": -0.4366043210029602, "step": 3930 }, { "epoch": 2.52, "learning_rate": 8.874407582938389e-08, "logits/generated": -0.38974112272262573, "logits/real": -0.6490163207054138, "logps/generated": -1125.834716796875, "logps/real": -153.83132934570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.05818176269531, "rewards/margins": 69.58064270019531, "rewards/real": -0.4775339961051941, "step": 3940 }, { "epoch": 2.53, "learning_rate": 8.755924170616114e-08, "logits/generated": -0.38700738549232483, "logits/real": -0.5742595195770264, "logps/generated": -1097.8369140625, "logps/real": -169.0182342529297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.09275817871094, "rewards/margins": 68.44459533691406, "rewards/real": -0.648157000541687, "step": 3950 }, { "epoch": 2.53, "learning_rate": 8.63744075829384e-08, "logits/generated": -0.3959638476371765, "logits/real": -0.6299315690994263, "logps/generated": -1148.005859375, "logps/real": -154.40768432617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.98236846923828, "rewards/margins": 74.33345794677734, "rewards/real": -0.6489164233207703, "step": 3960 }, { "epoch": 2.54, "learning_rate": 8.518957345971564e-08, "logits/generated": -0.3766046166419983, "logits/real": -0.5962399244308472, "logps/generated": -1100.2664794921875, "logps/real": -153.56520080566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.08140563964844, "rewards/margins": 68.45283508300781, "rewards/real": -0.6285830736160278, "step": 3970 }, { "epoch": 2.55, "learning_rate": 8.40047393364929e-08, "logits/generated": -0.41984719038009644, "logits/real": -0.5362011790275574, "logps/generated": -1218.0125732421875, "logps/real": -143.78208923339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -80.89471435546875, "rewards/margins": 80.50052642822266, "rewards/real": -0.3941938281059265, "step": 3980 }, { "epoch": 2.55, "learning_rate": 8.281990521327013e-08, "logits/generated": -0.38275301456451416, "logits/real": -0.6942519545555115, "logps/generated": -1104.48828125, "logps/real": -130.95724487304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.27180480957031, "rewards/margins": 68.72615051269531, "rewards/real": -0.5456700921058655, "step": 3990 }, { "epoch": 2.56, "learning_rate": 8.163507109004738e-08, "logits/generated": -0.369687020778656, "logits/real": -0.594490110874176, "logps/generated": -1186.750732421875, "logps/real": -140.19932556152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.59791564941406, "rewards/margins": 76.15618133544922, "rewards/real": -0.44173464179039, "step": 4000 }, { "epoch": 2.57, "learning_rate": 8.045023696682464e-08, "logits/generated": -0.4018821120262146, "logits/real": -0.6110813617706299, "logps/generated": -1141.607177734375, "logps/real": -150.42648315429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.14505767822266, "rewards/margins": 72.56416320800781, "rewards/real": -0.5808922052383423, "step": 4010 }, { "epoch": 2.57, "learning_rate": 7.926540284360189e-08, "logits/generated": -0.3948236405849457, "logits/real": -0.6121063232421875, "logps/generated": -1248.185302734375, "logps/real": -166.86651611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -84.28129577636719, "rewards/margins": 83.68611907958984, "rewards/real": -0.5951663851737976, "step": 4020 }, { "epoch": 2.58, "learning_rate": 7.808056872037915e-08, "logits/generated": -0.40597429871559143, "logits/real": -0.6436026692390442, "logps/generated": -1255.7674560546875, "logps/real": -145.66281127929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -84.02607727050781, "rewards/margins": 83.40240478515625, "rewards/real": -0.6236714124679565, "step": 4030 }, { "epoch": 2.58, "learning_rate": 7.689573459715639e-08, "logits/generated": -0.42012372612953186, "logits/real": -0.5629431009292603, "logps/generated": -1173.372314453125, "logps/real": -168.88525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -79.46331024169922, "rewards/margins": 78.79911804199219, "rewards/real": -0.6641994714736938, "step": 4040 }, { "epoch": 2.59, "learning_rate": 7.571090047393365e-08, "logits/generated": -0.41295844316482544, "logits/real": -0.6122807860374451, "logps/generated": -1108.602783203125, "logps/real": -137.6436767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.95465850830078, "rewards/margins": 70.28472900390625, "rewards/real": -0.6699261665344238, "step": 4050 }, { "epoch": 2.6, "learning_rate": 7.45260663507109e-08, "logits/generated": -0.4057750105857849, "logits/real": -0.6003803610801697, "logps/generated": -1285.003662109375, "logps/real": -139.14450073242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -89.37120056152344, "rewards/margins": 88.8534927368164, "rewards/real": -0.5177055597305298, "step": 4060 }, { "epoch": 2.6, "learning_rate": 7.334123222748814e-08, "logits/generated": -0.3600274324417114, "logits/real": -0.5848634243011475, "logps/generated": -1176.391357421875, "logps/real": -151.9239959716797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.63912200927734, "rewards/margins": 76.92236328125, "rewards/real": -0.716761589050293, "step": 4070 }, { "epoch": 2.61, "learning_rate": 7.215639810426539e-08, "logits/generated": -0.40445417165756226, "logits/real": -0.6599612832069397, "logps/generated": -1055.158203125, "logps/real": -162.99879455566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.7494888305664, "rewards/margins": 66.99449157714844, "rewards/real": -0.7549879550933838, "step": 4080 }, { "epoch": 2.62, "learning_rate": 7.097156398104265e-08, "logits/generated": -0.368557870388031, "logits/real": -0.47544288635253906, "logps/generated": -1161.69970703125, "logps/real": -145.6846923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.03729248046875, "rewards/margins": 74.46896362304688, "rewards/real": -0.5683245062828064, "step": 4090 }, { "epoch": 2.62, "learning_rate": 6.97867298578199e-08, "logits/generated": -0.4004407823085785, "logits/real": -0.5904231667518616, "logps/generated": -1147.255126953125, "logps/real": -144.21981811523438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.1914291381836, "rewards/margins": 73.42829895019531, "rewards/real": -0.7631380558013916, "step": 4100 }, { "epoch": 2.63, "learning_rate": 6.860189573459716e-08, "logits/generated": -0.42303165793418884, "logits/real": -0.6458145380020142, "logps/generated": -1187.7353515625, "logps/real": -157.00314331054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.56205749511719, "rewards/margins": 76.46504211425781, "rewards/real": -1.0970159769058228, "step": 4110 }, { "epoch": 2.64, "learning_rate": 6.74170616113744e-08, "logits/generated": -0.430379718542099, "logits/real": -0.595691442489624, "logps/generated": -1177.614990234375, "logps/real": -149.47544860839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -78.75337219238281, "rewards/margins": 78.08720397949219, "rewards/real": -0.666162371635437, "step": 4120 }, { "epoch": 2.64, "learning_rate": 6.623222748815166e-08, "logits/generated": -0.3575670123100281, "logits/real": -0.5881283283233643, "logps/generated": -1064.1312255859375, "logps/real": -139.80990600585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -68.53370666503906, "rewards/margins": 67.87602996826172, "rewards/real": -0.6576740741729736, "step": 4130 }, { "epoch": 2.65, "learning_rate": 6.504739336492891e-08, "logits/generated": -0.37114548683166504, "logits/real": -0.4665864408016205, "logps/generated": -1096.2255859375, "logps/real": -132.2154083251953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.34225463867188, "rewards/margins": 70.71492004394531, "rewards/real": -0.6273313164710999, "step": 4140 }, { "epoch": 2.66, "learning_rate": 6.386255924170615e-08, "logits/generated": -0.36751076579093933, "logits/real": -0.5819805264472961, "logps/generated": -1186.3887939453125, "logps/real": -146.05813598632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -78.0079116821289, "rewards/margins": 77.3697280883789, "rewards/real": -0.6381850838661194, "step": 4150 }, { "epoch": 2.66, "learning_rate": 6.26777251184834e-08, "logits/generated": -0.414253294467926, "logits/real": -0.6853745579719543, "logps/generated": -1162.65234375, "logps/real": -144.77853393554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.9441146850586, "rewards/margins": 74.01307678222656, "rewards/real": -0.931043267250061, "step": 4160 }, { "epoch": 2.67, "learning_rate": 6.149289099526066e-08, "logits/generated": -0.42148175835609436, "logits/real": -0.6784361600875854, "logps/generated": -1106.80078125, "logps/real": -149.71484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.97175598144531, "rewards/margins": 70.25888061523438, "rewards/real": -0.7128777503967285, "step": 4170 }, { "epoch": 2.67, "learning_rate": 6.030805687203791e-08, "logits/generated": -0.43376749753952026, "logits/real": -0.555932879447937, "logps/generated": -1215.3369140625, "logps/real": -154.15284729003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -78.09486389160156, "rewards/margins": 77.44525146484375, "rewards/real": -0.6496086716651917, "step": 4180 }, { "epoch": 2.68, "learning_rate": 5.912322274881516e-08, "logits/generated": -0.4182409346103668, "logits/real": -0.5983696579933167, "logps/generated": -1234.3037109375, "logps/real": -168.16717529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -84.26101684570312, "rewards/margins": 83.48126983642578, "rewards/real": -0.7797611951828003, "step": 4190 }, { "epoch": 2.69, "learning_rate": 5.793838862559241e-08, "logits/generated": -0.445539653301239, "logits/real": -0.670427680015564, "logps/generated": -1296.377685546875, "logps/real": -134.7132568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -88.41688537597656, "rewards/margins": 87.923095703125, "rewards/real": -0.49379315972328186, "step": 4200 }, { "epoch": 2.69, "learning_rate": 5.6753554502369666e-08, "logits/generated": -0.3767016530036926, "logits/real": -0.5589041709899902, "logps/generated": -1065.5745849609375, "logps/real": -145.93048095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -64.40142059326172, "rewards/margins": 63.82404708862305, "rewards/real": -0.577372133731842, "step": 4210 }, { "epoch": 2.7, "learning_rate": 5.556872037914691e-08, "logits/generated": -0.41733822226524353, "logits/real": -0.6436376571655273, "logps/generated": -1053.798095703125, "logps/real": -165.2022247314453, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.1214828491211, "rewards/margins": 64.43299865722656, "rewards/real": -0.6884856820106506, "step": 4220 }, { "epoch": 2.71, "learning_rate": 5.4383886255924165e-08, "logits/generated": -0.4003145098686218, "logits/real": -0.5307371020317078, "logps/generated": -1084.7730712890625, "logps/real": -168.169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.28605651855469, "rewards/margins": 66.21138763427734, "rewards/real": -1.0746623277664185, "step": 4230 }, { "epoch": 2.71, "learning_rate": 5.319905213270142e-08, "logits/generated": -0.4704248011112213, "logits/real": -0.6616953611373901, "logps/generated": -1222.5751953125, "logps/real": -148.44683837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -79.704345703125, "rewards/margins": 79.09273529052734, "rewards/real": -0.6116179823875427, "step": 4240 }, { "epoch": 2.72, "learning_rate": 5.201421800947867e-08, "logits/generated": -0.4243236482143402, "logits/real": -0.6298291683197021, "logps/generated": -1416.729736328125, "logps/real": -142.0489959716797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -100.81539154052734, "rewards/margins": 100.10444641113281, "rewards/real": -0.710952639579773, "step": 4250 }, { "epoch": 2.73, "learning_rate": 5.082938388625592e-08, "logits/generated": -0.4102560579776764, "logits/real": -0.5691145658493042, "logps/generated": -1115.2730712890625, "logps/real": -159.29409790039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -70.8335952758789, "rewards/margins": 70.12803649902344, "rewards/real": -0.7055586576461792, "step": 4260 }, { "epoch": 2.73, "learning_rate": 4.964454976303317e-08, "logits/generated": -0.4029023051261902, "logits/real": -0.6073136329650879, "logps/generated": -1238.347900390625, "logps/real": -143.02938842773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -82.9701156616211, "rewards/margins": 82.27519226074219, "rewards/real": -0.6949158906936646, "step": 4270 }, { "epoch": 2.74, "learning_rate": 4.845971563981042e-08, "logits/generated": -0.42402610182762146, "logits/real": -0.6584951877593994, "logps/generated": -1118.9874267578125, "logps/real": -145.93138122558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.63047790527344, "rewards/margins": 72.98558044433594, "rewards/real": -0.644890546798706, "step": 4280 }, { "epoch": 2.74, "learning_rate": 4.7274881516587676e-08, "logits/generated": -0.3934037685394287, "logits/real": -0.6283634901046753, "logps/generated": -1259.203369140625, "logps/real": -153.5712432861328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -86.28514099121094, "rewards/margins": 85.61091613769531, "rewards/real": -0.6742227673530579, "step": 4290 }, { "epoch": 2.75, "learning_rate": 4.609004739336492e-08, "logits/generated": -0.4452149271965027, "logits/real": -0.562778115272522, "logps/generated": -1178.203369140625, "logps/real": -167.87570190429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.86197662353516, "rewards/margins": 76.29110717773438, "rewards/real": -0.5708707571029663, "step": 4300 }, { "epoch": 2.76, "learning_rate": 4.4905213270142176e-08, "logits/generated": -0.46924668550491333, "logits/real": -0.5640957355499268, "logps/generated": -1178.0283203125, "logps/real": -152.62841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.18916320800781, "rewards/margins": 74.46082305908203, "rewards/real": -0.7283350229263306, "step": 4310 }, { "epoch": 2.76, "learning_rate": 4.372037914691943e-08, "logits/generated": -0.4569918215274811, "logits/real": -0.6253639459609985, "logps/generated": -1141.238525390625, "logps/real": -159.03335571289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.83621215820312, "rewards/margins": 72.23777770996094, "rewards/real": -0.5984372496604919, "step": 4320 }, { "epoch": 2.77, "learning_rate": 4.253554502369668e-08, "logits/generated": -0.3836295008659363, "logits/real": -0.5883413553237915, "logps/generated": -1028.1627197265625, "logps/real": -152.56336975097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.914405822753906, "rewards/margins": 62.3019905090332, "rewards/real": -0.6124156713485718, "step": 4330 }, { "epoch": 2.78, "learning_rate": 4.135071090047393e-08, "logits/generated": -0.43586069345474243, "logits/real": -0.584862232208252, "logps/generated": -1256.212646484375, "logps/real": -182.5783233642578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -84.49156951904297, "rewards/margins": 83.60160064697266, "rewards/real": -0.8899722099304199, "step": 4340 }, { "epoch": 2.78, "learning_rate": 4.016587677725118e-08, "logits/generated": -0.42249807715415955, "logits/real": -0.6765289902687073, "logps/generated": -1098.554443359375, "logps/real": -149.42665100097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.22462463378906, "rewards/margins": 68.48421478271484, "rewards/real": -0.740403950214386, "step": 4350 }, { "epoch": 2.79, "learning_rate": 3.8981042654028434e-08, "logits/generated": -0.3722071051597595, "logits/real": -0.6433338522911072, "logps/generated": -1152.187255859375, "logps/real": -135.0558624267578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.83736419677734, "rewards/margins": 71.99942016601562, "rewards/real": -0.8379424810409546, "step": 4360 }, { "epoch": 2.8, "learning_rate": 3.779620853080569e-08, "logits/generated": -0.4465080797672272, "logits/real": -0.6853441596031189, "logps/generated": -1219.987548828125, "logps/real": -149.74484252929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -81.68904876708984, "rewards/margins": 81.03762817382812, "rewards/real": -0.6514285802841187, "step": 4370 }, { "epoch": 2.8, "learning_rate": 3.661137440758294e-08, "logits/generated": -0.39111563563346863, "logits/real": -0.62447589635849, "logps/generated": -1110.0286865234375, "logps/real": -130.0288543701172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.36956787109375, "rewards/margins": 72.62205505371094, "rewards/real": -0.7475109696388245, "step": 4380 }, { "epoch": 2.81, "learning_rate": 3.5426540284360186e-08, "logits/generated": -0.3793131709098816, "logits/real": -0.6151835322380066, "logps/generated": -1089.3216552734375, "logps/real": -138.77415466308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -67.62622833251953, "rewards/margins": 67.07869720458984, "rewards/real": -0.5475287437438965, "step": 4390 }, { "epoch": 2.82, "learning_rate": 3.424170616113744e-08, "logits/generated": -0.41481703519821167, "logits/real": -0.6037416458129883, "logps/generated": -1118.58349609375, "logps/real": -156.24639892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.81546783447266, "rewards/margins": 71.1466293334961, "rewards/real": -0.6688372492790222, "step": 4400 }, { "epoch": 2.82, "learning_rate": 3.305687203791469e-08, "logits/generated": -0.38786306977272034, "logits/real": -0.6748972535133362, "logps/generated": -1132.7213134765625, "logps/real": -165.26429748535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.00263977050781, "rewards/margins": 73.53857421875, "rewards/real": -0.4640537202358246, "step": 4410 }, { "epoch": 2.83, "learning_rate": 3.1872037914691945e-08, "logits/generated": -0.3895108699798584, "logits/real": -0.5759503245353699, "logps/generated": -1006.7896728515625, "logps/real": -157.9845733642578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -62.23859786987305, "rewards/margins": 61.38972091674805, "rewards/real": -0.8488828539848328, "step": 4420 }, { "epoch": 2.83, "learning_rate": 3.068720379146919e-08, "logits/generated": -0.3578011691570282, "logits/real": -0.5436447262763977, "logps/generated": -1215.705078125, "logps/real": -147.42477416992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -80.29849243164062, "rewards/margins": 79.59217071533203, "rewards/real": -0.7063143253326416, "step": 4430 }, { "epoch": 2.84, "learning_rate": 2.9502369668246444e-08, "logits/generated": -0.41877445578575134, "logits/real": -0.5616232752799988, "logps/generated": -1106.929931640625, "logps/real": -175.37808227539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.43008422851562, "rewards/margins": 70.78630065917969, "rewards/real": -0.6437833905220032, "step": 4440 }, { "epoch": 2.85, "learning_rate": 2.8317535545023697e-08, "logits/generated": -0.42272821068763733, "logits/real": -0.6309406161308289, "logps/generated": -1138.3070068359375, "logps/real": -144.12118530273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.61137390136719, "rewards/margins": 72.8843994140625, "rewards/real": -0.7269810438156128, "step": 4450 }, { "epoch": 2.85, "learning_rate": 2.7132701421800947e-08, "logits/generated": -0.37885454297065735, "logits/real": -0.6365025043487549, "logps/generated": -1212.714111328125, "logps/real": -144.41900634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -78.62388610839844, "rewards/margins": 77.66529083251953, "rewards/real": -0.9586065411567688, "step": 4460 }, { "epoch": 2.86, "learning_rate": 2.59478672985782e-08, "logits/generated": -0.4420732855796814, "logits/real": -0.5323609113693237, "logps/generated": -1076.71533203125, "logps/real": -181.2960968017578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.17323303222656, "rewards/margins": 64.45045471191406, "rewards/real": -0.7227771878242493, "step": 4470 }, { "epoch": 2.87, "learning_rate": 2.476303317535545e-08, "logits/generated": -0.38568204641342163, "logits/real": -0.5752898454666138, "logps/generated": -1061.8555908203125, "logps/real": -170.11355590820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.07365417480469, "rewards/margins": 65.37210845947266, "rewards/real": -0.7015471458435059, "step": 4480 }, { "epoch": 2.87, "learning_rate": 2.3578199052132702e-08, "logits/generated": -0.4010697901248932, "logits/real": -0.5733628273010254, "logps/generated": -1116.184326171875, "logps/real": -161.65103149414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -71.5921630859375, "rewards/margins": 71.0265121459961, "rewards/real": -0.5656577944755554, "step": 4490 }, { "epoch": 2.88, "learning_rate": 2.239336492890995e-08, "logits/generated": -0.4115443229675293, "logits/real": -0.5424914360046387, "logps/generated": -1180.7628173828125, "logps/real": -185.47634887695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.3253402709961, "rewards/margins": 76.40950012207031, "rewards/real": -0.9158375859260559, "step": 4500 }, { "epoch": 2.89, "learning_rate": 2.1208530805687202e-08, "logits/generated": -0.42339619994163513, "logits/real": -0.6375981569290161, "logps/generated": -1170.9703369140625, "logps/real": -131.23190307617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.03814697265625, "rewards/margins": 76.52554321289062, "rewards/real": -0.5126058459281921, "step": 4510 }, { "epoch": 2.89, "learning_rate": 2.002369668246445e-08, "logits/generated": -0.42008423805236816, "logits/real": -0.6603757739067078, "logps/generated": -1106.7174072265625, "logps/real": -132.1379852294922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.35577392578125, "rewards/margins": 68.54685974121094, "rewards/real": -0.8089267611503601, "step": 4520 }, { "epoch": 2.9, "learning_rate": 1.8838862559241704e-08, "logits/generated": -0.4159209132194519, "logits/real": -0.6070636510848999, "logps/generated": -1108.8184814453125, "logps/real": -174.39251708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.12606811523438, "rewards/margins": 68.53681945800781, "rewards/real": -0.5892479419708252, "step": 4530 }, { "epoch": 2.9, "learning_rate": 1.7654028436018954e-08, "logits/generated": -0.41173315048217773, "logits/real": -0.48643770813941956, "logps/generated": -1070.396728515625, "logps/real": -191.44134521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -65.99903869628906, "rewards/margins": 65.2906265258789, "rewards/real": -0.7084180116653442, "step": 4540 }, { "epoch": 2.91, "learning_rate": 1.6469194312796207e-08, "logits/generated": -0.3698303997516632, "logits/real": -0.6320183277130127, "logps/generated": -1076.8929443359375, "logps/real": -139.63314819335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.975830078125, "rewards/margins": 66.27364349365234, "rewards/real": -0.7021877765655518, "step": 4550 }, { "epoch": 2.92, "learning_rate": 1.528436018957346e-08, "logits/generated": -0.3886292278766632, "logits/real": -0.615139365196228, "logps/generated": -1140.2864990234375, "logps/real": -145.85720825195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.76911163330078, "rewards/margins": 74.237060546875, "rewards/real": -0.5320545434951782, "step": 4560 }, { "epoch": 2.92, "learning_rate": 1.409952606635071e-08, "logits/generated": -0.4748724400997162, "logits/real": -0.6143825650215149, "logps/generated": -1171.03076171875, "logps/real": -139.5336151123047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -72.67819213867188, "rewards/margins": 72.13270568847656, "rewards/real": -0.5454872846603394, "step": 4570 }, { "epoch": 2.93, "learning_rate": 1.2914691943127961e-08, "logits/generated": -0.39133062958717346, "logits/real": -0.6907501816749573, "logps/generated": -1139.4451904296875, "logps/real": -122.94720458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -73.89988708496094, "rewards/margins": 73.2684097290039, "rewards/real": -0.6314736604690552, "step": 4580 }, { "epoch": 2.94, "learning_rate": 1.1729857819905212e-08, "logits/generated": -0.45327988266944885, "logits/real": -0.6570634245872498, "logps/generated": -1196.7115478515625, "logps/real": -150.64051818847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.87831115722656, "rewards/margins": 77.22245788574219, "rewards/real": -0.6558529138565063, "step": 4590 }, { "epoch": 2.94, "learning_rate": 1.0545023696682464e-08, "logits/generated": -0.43308648467063904, "logits/real": -0.6817704439163208, "logps/generated": -1163.740966796875, "logps/real": -139.56373596191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -76.80686950683594, "rewards/margins": 76.22025299072266, "rewards/real": -0.5866076350212097, "step": 4600 }, { "epoch": 2.95, "learning_rate": 9.360189573459715e-09, "logits/generated": -0.38277140259742737, "logits/real": -0.6559278964996338, "logps/generated": -1152.90625, "logps/real": -166.77056884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.07879638671875, "rewards/margins": 73.38710021972656, "rewards/real": -0.6916946172714233, "step": 4610 }, { "epoch": 2.96, "learning_rate": 8.175355450236966e-09, "logits/generated": -0.44269418716430664, "logits/real": -0.6459895968437195, "logps/generated": -1183.922607421875, "logps/real": -158.46505737304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -77.86439514160156, "rewards/margins": 77.26386260986328, "rewards/real": -0.6005492806434631, "step": 4620 }, { "epoch": 2.96, "learning_rate": 6.990521327014218e-09, "logits/generated": -0.42995685338974, "logits/real": -0.6797999739646912, "logps/generated": -1169.283447265625, "logps/real": -131.04364013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -75.65510559082031, "rewards/margins": 75.0411376953125, "rewards/real": -0.6139676570892334, "step": 4630 }, { "epoch": 2.97, "learning_rate": 5.805687203791469e-09, "logits/generated": -0.4121777415275574, "logits/real": -0.6116447448730469, "logps/generated": -1070.22314453125, "logps/real": -140.3560333251953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.28459167480469, "rewards/margins": 65.62239074707031, "rewards/real": -0.6622053384780884, "step": 4640 }, { "epoch": 2.98, "learning_rate": 4.62085308056872e-09, "logits/generated": -0.4175810217857361, "logits/real": -0.5604298114776611, "logps/generated": -1161.98583984375, "logps/real": -162.33438110351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -74.65108489990234, "rewards/margins": 74.18379211425781, "rewards/real": -0.4672994017601013, "step": 4650 }, { "epoch": 2.98, "learning_rate": 3.4360189573459714e-09, "logits/generated": -0.3966117799282074, "logits/real": -0.5953450202941895, "logps/generated": -1079.524169921875, "logps/real": -167.3221435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.10150909423828, "rewards/margins": 68.49336242675781, "rewards/real": -0.6081460118293762, "step": 4660 }, { "epoch": 2.99, "learning_rate": 2.2511848341232227e-09, "logits/generated": -0.4167659282684326, "logits/real": -0.6169396042823792, "logps/generated": -1015.7071533203125, "logps/real": -176.66506958007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.38505935668945, "rewards/margins": 60.835716247558594, "rewards/real": -0.5493378043174744, "step": 4670 }, { "epoch": 2.99, "learning_rate": 1.0663507109004738e-09, "logits/generated": -0.3958420157432556, "logits/real": -0.5675554275512695, "logps/generated": -1055.50341796875, "logps/real": -149.56039428710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -66.84716796875, "rewards/margins": 66.41246032714844, "rewards/real": -0.4347153604030609, "step": 4680 }, { "epoch": 3.0, "step": 4689, "total_flos": 0.0, "train_loss": 0.010306433322205334, "train_runtime": 33629.5111, "train_samples_per_second": 4.46, "train_steps_per_second": 0.139 } ], "logging_steps": 10, "max_steps": 4689, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }