{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 4668, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0706638115631692e-09, "logits/generated": -3.0364484786987305, "logits/real": -3.0630810260772705, "logps/generated": -251.72409057617188, "logps/real": -237.75723266601562, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.070663811563169e-08, "logits/generated": -2.9856934547424316, "logits/real": -2.989187240600586, "logps/generated": -390.6001892089844, "logps/real": -373.385498046875, "loss": 0.6943, "rewards/accuracies": 0.4861111044883728, "rewards/generated": -0.004033928737044334, "rewards/margins": 0.010273342952132225, "rewards/real": 0.006239414215087891, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.141327623126338e-08, "logits/generated": -3.0185017585754395, "logits/real": -2.9879310131073, "logps/generated": -393.1386413574219, "logps/real": -348.47198486328125, "loss": 0.6842, "rewards/accuracies": 0.574999988079071, "rewards/generated": 0.009772378951311111, "rewards/margins": 0.008250057697296143, "rewards/real": 0.018022436648607254, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.2119914346895076e-08, "logits/generated": -3.0177969932556152, "logits/real": -3.0238332748413086, "logps/generated": -361.01361083984375, "logps/real": -317.5545654296875, "loss": 0.666, "rewards/accuracies": 0.7124999761581421, "rewards/generated": 0.04016115143895149, "rewards/margins": 0.07257900387048721, "rewards/real": 0.112740159034729, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.282655246252676e-08, "logits/generated": -3.012861967086792, "logits/real": -3.010136604309082, "logps/generated": -404.3400573730469, "logps/real": -321.47833251953125, "loss": 0.6148, "rewards/accuracies": 0.800000011920929, "rewards/generated": 0.1161263957619667, "rewards/margins": 0.16384394466876984, "rewards/real": 0.27997034788131714, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.353319057815846e-08, "logits/generated": -3.006298065185547, "logits/real": -2.9836604595184326, "logps/generated": -386.39251708984375, "logps/real": -344.08502197265625, "loss": 0.5534, "rewards/accuracies": 0.762499988079071, "rewards/generated": 0.17066331207752228, "rewards/margins": 0.3133729100227356, "rewards/real": 0.48403626680374146, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.423982869379015e-08, "logits/generated": -2.9997777938842773, "logits/real": -2.991501808166504, "logps/generated": -408.845703125, "logps/real": -346.44403076171875, "loss": 0.4799, "rewards/accuracies": 0.8374999761581421, "rewards/generated": 0.11928486824035645, "rewards/margins": 0.6128198504447937, "rewards/real": 0.7321046590805054, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.494646680942184e-08, "logits/generated": -2.9945003986358643, "logits/real": -2.9864401817321777, "logps/generated": -417.6007385253906, "logps/real": -394.57769775390625, "loss": 0.412, "rewards/accuracies": 0.824999988079071, "rewards/generated": -0.056093405932188034, "rewards/margins": 1.043709635734558, "rewards/real": 0.9876161813735962, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.565310492505352e-08, "logits/generated": -2.996636390686035, "logits/real": -2.982595682144165, "logps/generated": -379.52105712890625, "logps/real": -327.92724609375, "loss": 0.3944, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -0.12083463370800018, "rewards/margins": 0.9464915990829468, "rewards/real": 0.8256568908691406, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.635974304068522e-08, "logits/generated": -2.9587833881378174, "logits/real": -2.9507949352264404, "logps/generated": -392.19256591796875, "logps/real": -346.1636962890625, "loss": 0.3626, "rewards/accuracies": 0.875, "rewards/generated": -0.1786222904920578, "rewards/margins": 1.2095777988433838, "rewards/real": 1.0309556722640991, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0706638115631692e-07, "logits/generated": -2.9543347358703613, "logits/real": -2.962299346923828, "logps/generated": -359.78009033203125, "logps/real": -339.92010498046875, "loss": 0.3373, "rewards/accuracies": 0.862500011920929, "rewards/generated": -0.21250668168067932, "rewards/margins": 1.3779737949371338, "rewards/real": 1.1654671430587769, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.177730192719486e-07, "logits/generated": -2.9344518184661865, "logits/real": -2.9429378509521484, "logps/generated": -400.2679748535156, "logps/real": -311.1820983886719, "loss": 0.28, "rewards/accuracies": 0.875, "rewards/generated": -0.47659602761268616, "rewards/margins": 1.5711917877197266, "rewards/real": 1.0945957899093628, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.284796573875803e-07, "logits/generated": -2.9682908058166504, "logits/real": -2.9808101654052734, "logps/generated": -382.69219970703125, "logps/real": -332.9756774902344, "loss": 0.261, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.8699382543563843, "rewards/margins": 1.8869482278823853, "rewards/real": 1.017009973526001, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3918629550321198e-07, "logits/generated": -2.950186014175415, "logits/real": -2.9397127628326416, "logps/generated": -397.447021484375, "logps/real": -314.9403076171875, "loss": 0.2336, "rewards/accuracies": 0.887499988079071, "rewards/generated": -1.2718974351882935, "rewards/margins": 2.2060093879699707, "rewards/real": 0.9341122508049011, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4989293361884367e-07, "logits/generated": -2.942518711090088, "logits/real": -2.9479198455810547, "logps/generated": -415.5738220214844, "logps/real": -311.505615234375, "loss": 0.2199, "rewards/accuracies": 0.9375, "rewards/generated": -1.8576176166534424, "rewards/margins": 2.6989645957946777, "rewards/real": 0.8413470387458801, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.6059957173447535e-07, "logits/generated": -2.934556007385254, "logits/real": -2.9276509284973145, "logps/generated": -408.669189453125, "logps/real": -376.3493957519531, "loss": 0.2065, "rewards/accuracies": 0.925000011920929, "rewards/generated": -1.4676240682601929, "rewards/margins": 2.4751994609832764, "rewards/real": 1.007575273513794, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.7130620985010704e-07, "logits/generated": -2.9072623252868652, "logits/real": -2.911750078201294, "logps/generated": -457.24188232421875, "logps/real": -372.0235595703125, "loss": 0.173, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.4295992851257324, "rewards/margins": 3.2849929332733154, "rewards/real": 0.8553940057754517, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.8201284796573874e-07, "logits/generated": -2.930424928665161, "logits/real": -2.917999744415283, "logps/generated": -365.8371887207031, "logps/real": -290.6273498535156, "loss": 0.1936, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -2.619870662689209, "rewards/margins": 2.9378836154937744, "rewards/real": 0.3180127739906311, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9271948608137044e-07, "logits/generated": -2.9163429737091064, "logits/real": -2.91850209236145, "logps/generated": -413.27288818359375, "logps/real": -344.4951171875, "loss": 0.1629, "rewards/accuracies": 0.9375, "rewards/generated": -3.2495665550231934, "rewards/margins": 3.9354729652404785, "rewards/real": 0.6859063506126404, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.0342612419700214e-07, "logits/generated": -2.914929151535034, "logits/real": -2.8999643325805664, "logps/generated": -455.07135009765625, "logps/real": -330.7913818359375, "loss": 0.1497, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.547805070877075, "rewards/margins": 3.8215222358703613, "rewards/real": 0.27371746301651, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1413276231263384e-07, "logits/generated": -2.898618698120117, "logits/real": -2.8733317852020264, "logps/generated": -466.3199768066406, "logps/real": -360.9929504394531, "loss": 0.1471, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.219028472900391, "rewards/margins": 4.406793594360352, "rewards/real": 0.18776562809944153, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.248394004282655e-07, "logits/generated": -2.8917415142059326, "logits/real": -2.8755955696105957, "logps/generated": -410.87451171875, "logps/real": -356.9337463378906, "loss": 0.1496, "rewards/accuracies": 0.925000011920929, "rewards/generated": -4.3014631271362305, "rewards/margins": 4.1496405601501465, "rewards/real": -0.1518225222826004, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.355460385438972e-07, "logits/generated": -2.8925869464874268, "logits/real": -2.8660550117492676, "logps/generated": -421.72430419921875, "logps/real": -346.0505065917969, "loss": 0.1502, "rewards/accuracies": 0.925000011920929, "rewards/generated": -4.356566429138184, "rewards/margins": 4.137876987457275, "rewards/real": -0.21868903934955597, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.462526766595289e-07, "logits/generated": -2.891606569290161, "logits/real": -2.8733668327331543, "logps/generated": -414.7857360839844, "logps/real": -352.6590881347656, "loss": 0.1554, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.677205562591553, "rewards/margins": 4.315895080566406, "rewards/real": -0.3613104224205017, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.569593147751606e-07, "logits/generated": -2.8873989582061768, "logits/real": -2.8477070331573486, "logps/generated": -446.8787536621094, "logps/real": -355.8851013183594, "loss": 0.1372, "rewards/accuracies": 0.925000011920929, "rewards/generated": -4.631860256195068, "rewards/margins": 4.302509784698486, "rewards/real": -0.3293505311012268, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.676659528907923e-07, "logits/generated": -2.8453519344329834, "logits/real": -2.813788652420044, "logps/generated": -419.0545959472656, "logps/real": -332.58184814453125, "loss": 0.1234, "rewards/accuracies": 0.949999988079071, "rewards/generated": -4.903704643249512, "rewards/margins": 4.795269966125488, "rewards/real": -0.10843384265899658, "step": 250 }, { "epoch": 0.17, "learning_rate": 2.7837259100642395e-07, "logits/generated": -2.826523542404175, "logits/real": -2.7952752113342285, "logps/generated": -454.9879455566406, "logps/real": -370.6460876464844, "loss": 0.1294, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -5.665987491607666, "rewards/margins": 4.868983268737793, "rewards/real": -0.7970041632652283, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.890792291220557e-07, "logits/generated": -2.7900607585906982, "logits/real": -2.7720229625701904, "logps/generated": -477.99053955078125, "logps/real": -324.0495300292969, "loss": 0.138, "rewards/accuracies": 0.9375, "rewards/generated": -6.587684631347656, "rewards/margins": 5.73649787902832, "rewards/real": -0.8511865735054016, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.9978586723768735e-07, "logits/generated": -2.7956130504608154, "logits/real": -2.7502362728118896, "logps/generated": -441.34320068359375, "logps/real": -338.3302917480469, "loss": 0.1215, "rewards/accuracies": 0.887499988079071, "rewards/generated": -5.030495643615723, "rewards/margins": 4.927591800689697, "rewards/real": -0.10290361940860748, "step": 280 }, { "epoch": 0.19, "learning_rate": 3.1049250535331905e-07, "logits/generated": -2.776495933532715, "logits/real": -2.7354507446289062, "logps/generated": -443.36163330078125, "logps/real": -347.2231750488281, "loss": 0.122, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -5.4566240310668945, "rewards/margins": 5.132528781890869, "rewards/real": -0.32409486174583435, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.211991434689507e-07, "logits/generated": -2.705543041229248, "logits/real": -2.6740341186523438, "logps/generated": -402.8146667480469, "logps/real": -273.10748291015625, "loss": 0.0935, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -6.659946441650391, "rewards/margins": 5.823763847351074, "rewards/real": -0.8361822962760925, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.3190578158458244e-07, "logits/generated": -2.7414534091949463, "logits/real": -2.694044589996338, "logps/generated": -466.59375, "logps/real": -307.7261657714844, "loss": 0.1304, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.890405178070068, "rewards/margins": 6.506340980529785, "rewards/real": -0.3840644359588623, "step": 310 }, { "epoch": 0.21, "learning_rate": 3.426124197002141e-07, "logits/generated": -2.7618134021759033, "logits/real": -2.7209994792938232, "logps/generated": -477.1549377441406, "logps/real": -336.30950927734375, "loss": 0.1194, "rewards/accuracies": 0.925000011920929, "rewards/generated": -7.347966194152832, "rewards/margins": 6.844850063323975, "rewards/real": -0.5031148791313171, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.533190578158458e-07, "logits/generated": -2.769962787628174, "logits/real": -2.6862077713012695, "logps/generated": -476.4065856933594, "logps/real": -356.8155517578125, "loss": 0.1259, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -6.444831848144531, "rewards/margins": 6.0999369621276855, "rewards/real": -0.3448948264122009, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.640256959314775e-07, "logits/generated": -2.730198383331299, "logits/real": -2.729130268096924, "logps/generated": -452.85626220703125, "logps/real": -316.28997802734375, "loss": 0.1254, "rewards/accuracies": 0.9375, "rewards/generated": -6.9878082275390625, "rewards/margins": 6.373122215270996, "rewards/real": -0.6146861910820007, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.747323340471092e-07, "logits/generated": -2.700066566467285, "logits/real": -2.6724190711975098, "logps/generated": -458.28857421875, "logps/real": -329.33203125, "loss": 0.1234, "rewards/accuracies": 0.9375, "rewards/generated": -5.48982048034668, "rewards/margins": 5.636635780334473, "rewards/real": 0.14681576192378998, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.854389721627409e-07, "logits/generated": -2.709381103515625, "logits/real": -2.6651294231414795, "logps/generated": -462.7777404785156, "logps/real": -367.77740478515625, "loss": 0.0883, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -6.926550388336182, "rewards/margins": 6.700352668762207, "rewards/real": -0.22619831562042236, "step": 360 }, { "epoch": 0.24, "learning_rate": 3.961456102783726e-07, "logits/generated": -2.7424886226654053, "logits/real": -2.6503500938415527, "logps/generated": -489.62542724609375, "logps/real": -340.3368225097656, "loss": 0.1383, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -7.2433366775512695, "rewards/margins": 6.5567626953125, "rewards/real": -0.6865738034248352, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.068522483940043e-07, "logits/generated": -2.6518828868865967, "logits/real": -2.578998327255249, "logps/generated": -491.3388671875, "logps/real": -354.8604736328125, "loss": 0.1068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.11082935333252, "rewards/margins": 6.844748020172119, "rewards/real": -1.2660824060440063, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.175588865096359e-07, "logits/generated": -2.6482059955596924, "logits/real": -2.593445301055908, "logps/generated": -458.67864990234375, "logps/real": -341.2268981933594, "loss": 0.1243, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.110146999359131, "rewards/margins": 6.503669738769531, "rewards/real": -0.6064783334732056, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.282655246252677e-07, "logits/generated": -2.6590518951416016, "logits/real": -2.589247703552246, "logps/generated": -485.0723571777344, "logps/real": -371.298828125, "loss": 0.1143, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -7.317061424255371, "rewards/margins": 6.85882568359375, "rewards/real": -0.4582356810569763, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.389721627408993e-07, "logits/generated": -2.657744884490967, "logits/real": -2.634624481201172, "logps/generated": -440.25396728515625, "logps/real": -309.36871337890625, "loss": 0.1418, "rewards/accuracies": 0.925000011920929, "rewards/generated": -6.629510402679443, "rewards/margins": 6.084707260131836, "rewards/real": -0.5448042154312134, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.49678800856531e-07, "logits/generated": -2.7457222938537598, "logits/real": -2.698697566986084, "logps/generated": -477.81475830078125, "logps/real": -359.94305419921875, "loss": 0.118, "rewards/accuracies": 0.9375, "rewards/generated": -5.93577241897583, "rewards/margins": 6.0500030517578125, "rewards/real": 0.11423077434301376, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.603854389721627e-07, "logits/generated": -2.7255027294158936, "logits/real": -2.6430881023406982, "logps/generated": -491.6402282714844, "logps/real": -329.3466796875, "loss": 0.1015, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -6.748249053955078, "rewards/margins": 6.983546257019043, "rewards/real": 0.23529770970344543, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.710920770877944e-07, "logits/generated": -2.727783679962158, "logits/real": -2.6885695457458496, "logps/generated": -479.18585205078125, "logps/real": -303.29132080078125, "loss": 0.1195, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.20239782333374, "rewards/margins": 6.955704689025879, "rewards/real": -0.24669349193572998, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.817987152034261e-07, "logits/generated": -2.704719305038452, "logits/real": -2.672463893890381, "logps/generated": -448.4974060058594, "logps/real": -331.91583251953125, "loss": 0.1325, "rewards/accuracies": 0.949999988079071, "rewards/generated": -6.272010326385498, "rewards/margins": 5.807042121887207, "rewards/real": -0.4649685323238373, "step": 450 }, { "epoch": 0.3, "learning_rate": 4.925053533190578e-07, "logits/generated": -2.7187066078186035, "logits/real": -2.650846004486084, "logps/generated": -499.6753845214844, "logps/real": -356.98614501953125, "loss": 0.1159, "rewards/accuracies": 0.949999988079071, "rewards/generated": -8.36271858215332, "rewards/margins": 7.270302772521973, "rewards/real": -1.092416524887085, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.996429421566293e-07, "logits/generated": -2.65824556350708, "logits/real": -2.5810179710388184, "logps/generated": -515.4708862304688, "logps/real": -335.44573974609375, "loss": 0.1184, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -9.19658088684082, "rewards/margins": 8.151226043701172, "rewards/real": -1.0453550815582275, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.98452749345394e-07, "logits/generated": -2.572777032852173, "logits/real": -2.529254913330078, "logps/generated": -461.7464294433594, "logps/real": -406.6604309082031, "loss": 0.0779, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.338438034057617, "rewards/margins": 7.123196601867676, "rewards/real": -1.2152409553527832, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.972625565341585e-07, "logits/generated": -2.5586600303649902, "logits/real": -2.4877161979675293, "logps/generated": -430.3533630371094, "logps/real": -323.3705749511719, "loss": 0.0999, "rewards/accuracies": 0.9375, "rewards/generated": -7.9221014976501465, "rewards/margins": 7.004052639007568, "rewards/real": -0.9180490374565125, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.960723637229232e-07, "logits/generated": -2.5836546421051025, "logits/real": -2.552192211151123, "logps/generated": -456.425537109375, "logps/real": -340.4407958984375, "loss": 0.0722, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.413108825683594, "rewards/margins": 7.556717872619629, "rewards/real": -0.8563922047615051, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.948821709116876e-07, "logits/generated": -2.632378101348877, "logits/real": -2.586066246032715, "logps/generated": -491.17681884765625, "logps/real": -358.8891906738281, "loss": 0.1264, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -9.601828575134277, "rewards/margins": 8.468477249145508, "rewards/real": -1.1333516836166382, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.936919781004522e-07, "logits/generated": -2.6995837688446045, "logits/real": -2.663362979888916, "logps/generated": -473.89410400390625, "logps/real": -377.1138610839844, "loss": 0.1473, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.111103057861328, "rewards/margins": 7.439300537109375, "rewards/real": -0.6718028783798218, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.925017852892168e-07, "logits/generated": -2.7533721923828125, "logits/real": -2.7160682678222656, "logps/generated": -463.5284729003906, "logps/real": -364.9224548339844, "loss": 0.1281, "rewards/accuracies": 0.949999988079071, "rewards/generated": -6.726442813873291, "rewards/margins": 7.018113136291504, "rewards/real": 0.29166945815086365, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.913115924779814e-07, "logits/generated": -2.692500352859497, "logits/real": -2.6972968578338623, "logps/generated": -485.6321716308594, "logps/real": -365.08599853515625, "loss": 0.093, "rewards/accuracies": 0.949999988079071, "rewards/generated": -7.418447971343994, "rewards/margins": 6.375821113586426, "rewards/real": -1.0426270961761475, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.90121399666746e-07, "logits/generated": -2.6083171367645264, "logits/real": -2.5617499351501465, "logps/generated": -464.82916259765625, "logps/real": -343.98126220703125, "loss": 0.0843, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.705323696136475, "rewards/margins": 7.201271057128906, "rewards/real": -0.504052996635437, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.889312068555106e-07, "logits/generated": -2.628220558166504, "logits/real": -2.526280403137207, "logps/generated": -502.65032958984375, "logps/real": -313.36639404296875, "loss": 0.1052, "rewards/accuracies": 0.949999988079071, "rewards/generated": -9.855968475341797, "rewards/margins": 8.44670295715332, "rewards/real": -1.4092657566070557, "step": 560 }, { "epoch": 0.37, "learning_rate": 4.877410140442752e-07, "logits/generated": -2.6273674964904785, "logits/real": -2.5923876762390137, "logps/generated": -443.7357482910156, "logps/real": -344.1656188964844, "loss": 0.1226, "rewards/accuracies": 0.9375, "rewards/generated": -9.525670051574707, "rewards/margins": 7.985457420349121, "rewards/real": -1.5402114391326904, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.865508212330398e-07, "logits/generated": -2.734830379486084, "logits/real": -2.637960433959961, "logps/generated": -488.81781005859375, "logps/real": -388.02618408203125, "loss": 0.1234, "rewards/accuracies": 0.925000011920929, "rewards/generated": -8.961756706237793, "rewards/margins": 8.064661979675293, "rewards/real": -0.8970959782600403, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.853606284218044e-07, "logits/generated": -2.6835715770721436, "logits/real": -2.6238436698913574, "logps/generated": -435.009033203125, "logps/real": -347.2498474121094, "loss": 0.08, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -7.5509772300720215, "rewards/margins": 7.190362453460693, "rewards/real": -0.3606160879135132, "step": 590 }, { "epoch": 0.39, "learning_rate": 4.841704356105689e-07, "logits/generated": -2.667619466781616, "logits/real": -2.594552516937256, "logps/generated": -485.05596923828125, "logps/real": -357.52520751953125, "loss": 0.1011, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.562358856201172, "rewards/margins": 7.882147789001465, "rewards/real": -0.6802110075950623, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.829802427993334e-07, "logits/generated": -2.7254223823547363, "logits/real": -2.674651861190796, "logps/generated": -486.1986389160156, "logps/real": -366.41973876953125, "loss": 0.1319, "rewards/accuracies": 0.949999988079071, "rewards/generated": -9.714421272277832, "rewards/margins": 8.38983154296875, "rewards/real": -1.3245890140533447, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.81790049988098e-07, "logits/generated": -2.7993836402893066, "logits/real": -2.6992013454437256, "logps/generated": -477.964111328125, "logps/real": -352.30255126953125, "loss": 0.1332, "rewards/accuracies": 0.925000011920929, "rewards/generated": -9.314876556396484, "rewards/margins": 8.130788803100586, "rewards/real": -1.1840870380401611, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.805998571768626e-07, "logits/generated": -2.7570552825927734, "logits/real": -2.722883701324463, "logps/generated": -466.1041564941406, "logps/real": -327.46435546875, "loss": 0.0768, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -10.392984390258789, "rewards/margins": 8.88233757019043, "rewards/real": -1.510647177696228, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.794096643656272e-07, "logits/generated": -2.7757675647735596, "logits/real": -2.696953535079956, "logps/generated": -573.4818115234375, "logps/real": -451.28277587890625, "loss": 0.0841, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -10.725347518920898, "rewards/margins": 9.061750411987305, "rewards/real": -1.6635980606079102, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.782194715543918e-07, "logits/generated": -2.754331111907959, "logits/real": -2.675215482711792, "logps/generated": -536.6560668945312, "logps/real": -378.1103515625, "loss": 0.1631, "rewards/accuracies": 0.925000011920929, "rewards/generated": -9.196868896484375, "rewards/margins": 8.15418815612793, "rewards/real": -1.0426809787750244, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.770292787431564e-07, "logits/generated": -2.869988441467285, "logits/real": -2.751678943634033, "logps/generated": -494.60772705078125, "logps/real": -374.7413024902344, "loss": 0.1045, "rewards/accuracies": 0.949999988079071, "rewards/generated": -8.500930786132812, "rewards/margins": 7.580558776855469, "rewards/real": -0.920372486114502, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.7583908593192097e-07, "logits/generated": -2.8370208740234375, "logits/real": -2.6929588317871094, "logps/generated": -486.40533447265625, "logps/real": -372.7452087402344, "loss": 0.0754, "rewards/accuracies": 0.949999988079071, "rewards/generated": -9.398336410522461, "rewards/margins": 8.141416549682617, "rewards/real": -1.2569185495376587, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.746488931206855e-07, "logits/generated": -2.7330057621002197, "logits/real": -2.7403512001037598, "logps/generated": -512.094970703125, "logps/real": -335.0165100097656, "loss": 0.0968, "rewards/accuracies": 1.0, "rewards/generated": -10.172496795654297, "rewards/margins": 9.116006851196289, "rewards/real": -1.056490182876587, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.734587003094501e-07, "logits/generated": -2.8084769248962402, "logits/real": -2.7017087936401367, "logps/generated": -514.4985961914062, "logps/real": -370.76409912109375, "loss": 0.075, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -9.885915756225586, "rewards/margins": 8.93104076385498, "rewards/real": -0.954875111579895, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.722685074982147e-07, "logits/generated": -2.7886388301849365, "logits/real": -2.7220773696899414, "logps/generated": -485.0414123535156, "logps/real": -330.4255065917969, "loss": 0.1032, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.818674087524414, "rewards/margins": 9.245843887329102, "rewards/real": -1.5728291273117065, "step": 700 }, { "epoch": 0.46, "learning_rate": 4.710783146869793e-07, "logits/generated": -2.8421998023986816, "logits/real": -2.718574047088623, "logps/generated": -492.8663024902344, "logps/real": -378.52239990234375, "loss": 0.1112, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.291857719421387, "rewards/margins": 9.014776229858398, "rewards/real": -1.277081847190857, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.698881218757438e-07, "logits/generated": -2.7403712272644043, "logits/real": -2.693634271621704, "logps/generated": -450.5480041503906, "logps/real": -318.03704833984375, "loss": 0.0636, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.008447647094727, "rewards/margins": 8.986620903015137, "rewards/real": -2.021826982498169, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.6869792906450845e-07, "logits/generated": -2.738825559616089, "logits/real": -2.6762800216674805, "logps/generated": -516.190673828125, "logps/real": -378.79364013671875, "loss": 0.0617, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.853250503540039, "rewards/margins": 9.828583717346191, "rewards/real": -2.0246665477752686, "step": 730 }, { "epoch": 0.48, "learning_rate": 4.67507736253273e-07, "logits/generated": -2.772712230682373, "logits/real": -2.7906768321990967, "logps/generated": -503.3374938964844, "logps/real": -324.5931396484375, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.763754844665527, "rewards/margins": 10.069581985473633, "rewards/real": -1.694173812866211, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.6631754344203763e-07, "logits/generated": -2.74733829498291, "logits/real": -2.7517824172973633, "logps/generated": -493.4088439941406, "logps/real": -340.09991455078125, "loss": 0.0989, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.747503280639648, "rewards/margins": 8.963977813720703, "rewards/real": -1.7835248708724976, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.6512735063080217e-07, "logits/generated": -2.7257447242736816, "logits/real": -2.718722343444824, "logps/generated": -498.18255615234375, "logps/real": -334.10333251953125, "loss": 0.1144, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.353604316711426, "rewards/margins": 9.699551582336426, "rewards/real": -1.654052495956421, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6393715781956676e-07, "logits/generated": -2.699676990509033, "logits/real": -2.6231935024261475, "logps/generated": -525.2720336914062, "logps/real": -386.4446716308594, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/generated": -10.2306547164917, "rewards/margins": 9.587125778198242, "rewards/real": -0.6435292959213257, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.6274696500833135e-07, "logits/generated": -2.677091360092163, "logits/real": -2.612697124481201, "logps/generated": -524.1741943359375, "logps/real": -354.40130615234375, "loss": 0.1003, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.090825080871582, "rewards/margins": 9.66313362121582, "rewards/real": -1.4276920557022095, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.6155677219709594e-07, "logits/generated": -2.6755471229553223, "logits/real": -2.562119960784912, "logps/generated": -477.6163024902344, "logps/real": -376.70867919921875, "loss": 0.0953, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -10.81368350982666, "rewards/margins": 9.579755783081055, "rewards/real": -1.2339270114898682, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.603665793858605e-07, "logits/generated": -2.6505370140075684, "logits/real": -2.5655438899993896, "logps/generated": -453.52276611328125, "logps/real": -346.87591552734375, "loss": 0.1396, "rewards/accuracies": 0.9375, "rewards/generated": -10.680634498596191, "rewards/margins": 8.618191719055176, "rewards/real": -2.062441825866699, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.5917638657462507e-07, "logits/generated": -2.5472962856292725, "logits/real": -2.4991610050201416, "logps/generated": -514.5006103515625, "logps/real": -402.99920654296875, "loss": 0.1054, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.380758285522461, "rewards/margins": 8.436528205871582, "rewards/real": -1.9442304372787476, "step": 810 }, { "epoch": 0.53, "learning_rate": 4.5798619376338966e-07, "logits/generated": -2.5719590187072754, "logits/real": -2.519853115081787, "logps/generated": -456.893310546875, "logps/real": -343.7867431640625, "loss": 0.1149, "rewards/accuracies": 0.9375, "rewards/generated": -10.286421775817871, "rewards/margins": 8.027682304382324, "rewards/real": -2.258739471435547, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.567960009521542e-07, "logits/generated": -2.5605781078338623, "logits/real": -2.501222610473633, "logps/generated": -433.1905212402344, "logps/real": -311.0120544433594, "loss": 0.0988, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -10.496920585632324, "rewards/margins": 8.129188537597656, "rewards/real": -2.367732048034668, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.5560580814091884e-07, "logits/generated": -2.565680503845215, "logits/real": -2.4589760303497314, "logps/generated": -498.48583984375, "logps/real": -353.51470947265625, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.815507888793945, "rewards/margins": 10.009346961975098, "rewards/real": -2.8061606884002686, "step": 840 }, { "epoch": 0.55, "learning_rate": 4.5441561532968337e-07, "logits/generated": -2.524672746658325, "logits/real": -2.4803435802459717, "logps/generated": -510.71533203125, "logps/real": -395.97503662109375, "loss": 0.1072, "rewards/accuracies": 0.9375, "rewards/generated": -10.569852828979492, "rewards/margins": 8.171304702758789, "rewards/real": -2.398545742034912, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.5322542251844796e-07, "logits/generated": -2.4687283039093018, "logits/real": -2.4027516841888428, "logps/generated": -550.343017578125, "logps/real": -387.80523681640625, "loss": 0.0655, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.694091796875, "rewards/margins": 10.239914894104004, "rewards/real": -3.454176425933838, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.5203522970721255e-07, "logits/generated": -2.4605631828308105, "logits/real": -2.317789077758789, "logps/generated": -515.8772583007812, "logps/real": -373.03094482421875, "loss": 0.0921, "rewards/accuracies": 0.949999988079071, "rewards/generated": -13.49272346496582, "rewards/margins": 9.03429126739502, "rewards/real": -4.458432197570801, "step": 870 }, { "epoch": 0.57, "learning_rate": 4.5084503689597714e-07, "logits/generated": -2.5647082328796387, "logits/real": -2.478529453277588, "logps/generated": -497.2862854003906, "logps/real": -409.93402099609375, "loss": 0.1512, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.405123710632324, "rewards/margins": 8.666096687316895, "rewards/real": -1.7390273809432983, "step": 880 }, { "epoch": 0.57, "learning_rate": 4.496548440847417e-07, "logits/generated": -2.4828336238861084, "logits/real": -2.413994550704956, "logps/generated": -457.74127197265625, "logps/real": -331.61566162109375, "loss": 0.107, "rewards/accuracies": 0.9375, "rewards/generated": -10.047932624816895, "rewards/margins": 8.361185073852539, "rewards/real": -1.6867475509643555, "step": 890 }, { "epoch": 0.58, "learning_rate": 4.484646512735063e-07, "logits/generated": -2.5370724201202393, "logits/real": -2.4653468132019043, "logps/generated": -478.68011474609375, "logps/real": -370.4181213378906, "loss": 0.1372, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -10.04428768157959, "rewards/margins": 8.153284072875977, "rewards/real": -1.8910045623779297, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.4727445846227086e-07, "logits/generated": -2.500948667526245, "logits/real": -2.420001745223999, "logps/generated": -470.17803955078125, "logps/real": -346.50494384765625, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.731805801391602, "rewards/margins": 8.218327522277832, "rewards/real": -1.5134775638580322, "step": 910 }, { "epoch": 0.59, "learning_rate": 4.4608426565103545e-07, "logits/generated": -2.4795870780944824, "logits/real": -2.42265248298645, "logps/generated": -478.30303955078125, "logps/real": -338.5771789550781, "loss": 0.1485, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.962733268737793, "rewards/margins": 8.512125968933105, "rewards/real": -1.4506077766418457, "step": 920 }, { "epoch": 0.6, "learning_rate": 4.4489407283980004e-07, "logits/generated": -2.4491372108459473, "logits/real": -2.4447901248931885, "logps/generated": -483.40020751953125, "logps/real": -362.98577880859375, "loss": 0.1104, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.003097534179688, "rewards/margins": 9.003512382507324, "rewards/real": -1.9995838403701782, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.437038800285646e-07, "logits/generated": -2.459510326385498, "logits/real": -2.4044106006622314, "logps/generated": -475.15625, "logps/real": -334.83526611328125, "loss": 0.1292, "rewards/accuracies": 0.9375, "rewards/generated": -9.873337745666504, "rewards/margins": 8.043792724609375, "rewards/real": -1.829545021057129, "step": 940 }, { "epoch": 0.61, "learning_rate": 4.4251368721732916e-07, "logits/generated": -2.455169200897217, "logits/real": -2.3593177795410156, "logps/generated": -560.1038208007812, "logps/real": -385.29278564453125, "loss": 0.1159, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.663183212280273, "rewards/margins": 9.093454360961914, "rewards/real": -2.569728136062622, "step": 950 }, { "epoch": 0.62, "learning_rate": 4.413234944060938e-07, "logits/generated": -2.4482192993164062, "logits/real": -2.3591175079345703, "logps/generated": -466.2801208496094, "logps/real": -324.6377868652344, "loss": 0.138, "rewards/accuracies": 0.9375, "rewards/generated": -10.816349029541016, "rewards/margins": 8.520627975463867, "rewards/real": -2.2957208156585693, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4013330159485834e-07, "logits/generated": -2.435135841369629, "logits/real": -2.4362385272979736, "logps/generated": -432.5603942871094, "logps/real": -325.52471923828125, "loss": 0.1216, "rewards/accuracies": 0.9375, "rewards/generated": -10.173177719116211, "rewards/margins": 7.554961204528809, "rewards/real": -2.6182148456573486, "step": 970 }, { "epoch": 0.63, "learning_rate": 4.3894310878362293e-07, "logits/generated": -2.3807005882263184, "logits/real": -2.354038715362549, "logps/generated": -488.814208984375, "logps/real": -333.68096923828125, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/generated": -11.279688835144043, "rewards/margins": 8.787554740905762, "rewards/real": -2.4921340942382812, "step": 980 }, { "epoch": 0.64, "learning_rate": 4.377529159723875e-07, "logits/generated": -2.4375739097595215, "logits/real": -2.33906888961792, "logps/generated": -506.0091247558594, "logps/real": -340.0153503417969, "loss": 0.0866, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.742327690124512, "rewards/margins": 9.213804244995117, "rewards/real": -2.5285239219665527, "step": 990 }, { "epoch": 0.64, "learning_rate": 4.365627231611521e-07, "logits/generated": -2.373945951461792, "logits/real": -2.355705738067627, "logps/generated": -559.3948364257812, "logps/real": -378.2243957519531, "loss": 0.1036, "rewards/accuracies": 0.949999988079071, "rewards/generated": -13.373077392578125, "rewards/margins": 11.117111206054688, "rewards/real": -2.255967617034912, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.3537253034991665e-07, "logits/generated": -2.3554375171661377, "logits/real": -2.2880444526672363, "logps/generated": -538.6488037109375, "logps/real": -424.7191467285156, "loss": 0.1104, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.977459907531738, "rewards/margins": 9.986202239990234, "rewards/real": -2.9912569522857666, "step": 1010 }, { "epoch": 0.66, "learning_rate": 4.3418233753868124e-07, "logits/generated": -2.3351311683654785, "logits/real": -2.3256137371063232, "logps/generated": -551.7449340820312, "logps/real": -456.4244079589844, "loss": 0.0951, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.744857788085938, "rewards/margins": 10.64583969116211, "rewards/real": -1.0990195274353027, "step": 1020 }, { "epoch": 0.66, "learning_rate": 4.3299214472744583e-07, "logits/generated": -2.449162006378174, "logits/real": -2.3387961387634277, "logps/generated": -493.4405822753906, "logps/real": -366.27191162109375, "loss": 0.0719, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.17166519165039, "rewards/margins": 9.858702659606934, "rewards/real": -1.3129618167877197, "step": 1030 }, { "epoch": 0.67, "learning_rate": 4.3180195191621036e-07, "logits/generated": -2.372615337371826, "logits/real": -2.357564687728882, "logps/generated": -506.31890869140625, "logps/real": -382.11859130859375, "loss": 0.1139, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -10.259264945983887, "rewards/margins": 8.731651306152344, "rewards/real": -1.5276130437850952, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.30611759104975e-07, "logits/generated": -2.4573209285736084, "logits/real": -2.3666205406188965, "logps/generated": -515.4904174804688, "logps/real": -379.75238037109375, "loss": 0.0756, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.583343505859375, "rewards/margins": 10.160721778869629, "rewards/real": -2.422621011734009, "step": 1050 }, { "epoch": 0.68, "learning_rate": 4.2942156629373954e-07, "logits/generated": -2.316649913787842, "logits/real": -2.2139687538146973, "logps/generated": -532.4139404296875, "logps/real": -382.7889404296875, "loss": 0.0792, "rewards/accuracies": 0.9375, "rewards/generated": -13.652563095092773, "rewards/margins": 9.413002967834473, "rewards/real": -4.239560604095459, "step": 1060 }, { "epoch": 0.69, "learning_rate": 4.2823137348250413e-07, "logits/generated": -2.2381844520568848, "logits/real": -2.1988272666931152, "logps/generated": -531.000244140625, "logps/real": -389.1220397949219, "loss": 0.0604, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.427118301391602, "rewards/margins": 10.272978782653809, "rewards/real": -3.1541380882263184, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.270411806712687e-07, "logits/generated": -2.3720908164978027, "logits/real": -2.348632335662842, "logps/generated": -521.8538818359375, "logps/real": -375.3919677734375, "loss": 0.0944, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.00294017791748, "rewards/margins": 10.072427749633789, "rewards/real": -2.930511474609375, "step": 1080 }, { "epoch": 0.7, "learning_rate": 4.258509878600333e-07, "logits/generated": -2.3700733184814453, "logits/real": -2.3018269538879395, "logps/generated": -437.77056884765625, "logps/real": -336.5913391113281, "loss": 0.1074, "rewards/accuracies": 0.925000011920929, "rewards/generated": -10.549997329711914, "rewards/margins": 8.99598217010498, "rewards/real": -1.5540151596069336, "step": 1090 }, { "epoch": 0.71, "learning_rate": 4.2466079504879785e-07, "logits/generated": -2.4425048828125, "logits/real": -2.3580105304718018, "logps/generated": -480.294677734375, "logps/real": -348.6474914550781, "loss": 0.1287, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.765533447265625, "rewards/margins": 8.889466285705566, "rewards/real": -1.876068115234375, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.234706022375625e-07, "logits/generated": -2.4600212574005127, "logits/real": -2.46691632270813, "logps/generated": -464.4510803222656, "logps/real": -327.79296875, "loss": 0.1345, "rewards/accuracies": 0.925000011920929, "rewards/generated": -10.468949317932129, "rewards/margins": 8.587320327758789, "rewards/real": -1.8816286325454712, "step": 1110 }, { "epoch": 0.72, "learning_rate": 4.2228040942632703e-07, "logits/generated": -2.4465115070343018, "logits/real": -2.4585764408111572, "logps/generated": -460.46795654296875, "logps/real": -305.36920166015625, "loss": 0.1342, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -9.80966854095459, "rewards/margins": 7.955733299255371, "rewards/real": -1.8539355993270874, "step": 1120 }, { "epoch": 0.73, "learning_rate": 4.210902166150916e-07, "logits/generated": -2.297677516937256, "logits/real": -2.2483174800872803, "logps/generated": -491.7379455566406, "logps/real": -358.0972900390625, "loss": 0.1178, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.660999298095703, "rewards/margins": 9.423800468444824, "rewards/real": -2.237199306488037, "step": 1130 }, { "epoch": 0.73, "learning_rate": 4.199000238038562e-07, "logits/generated": -2.337754249572754, "logits/real": -2.2701668739318848, "logps/generated": -540.6451416015625, "logps/real": -358.3714294433594, "loss": 0.0784, "rewards/accuracies": 0.9375, "rewards/generated": -12.581125259399414, "rewards/margins": 10.22091293334961, "rewards/real": -2.3602118492126465, "step": 1140 }, { "epoch": 0.74, "learning_rate": 4.187098309926208e-07, "logits/generated": -2.328687906265259, "logits/real": -2.2642407417297363, "logps/generated": -510.14031982421875, "logps/real": -395.0322570800781, "loss": 0.09, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.54482650756836, "rewards/margins": 9.52647590637207, "rewards/real": -2.0183498859405518, "step": 1150 }, { "epoch": 0.75, "learning_rate": 4.1751963818138534e-07, "logits/generated": -2.2651526927948, "logits/real": -2.2788243293762207, "logps/generated": -504.93603515625, "logps/real": -315.02191162109375, "loss": 0.1041, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.738065719604492, "rewards/margins": 10.608281135559082, "rewards/real": -3.129784345626831, "step": 1160 }, { "epoch": 0.75, "learning_rate": 4.1632944537015e-07, "logits/generated": -2.2317709922790527, "logits/real": -2.24200177192688, "logps/generated": -509.27069091796875, "logps/real": -339.88775634765625, "loss": 0.1091, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.493547439575195, "rewards/margins": 10.474954605102539, "rewards/real": -3.0185914039611816, "step": 1170 }, { "epoch": 0.76, "learning_rate": 4.151392525589145e-07, "logits/generated": -2.1863222122192383, "logits/real": -2.1978249549865723, "logps/generated": -579.1888427734375, "logps/real": -406.6951904296875, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.355679512023926, "rewards/margins": 11.128015518188477, "rewards/real": -3.227665424346924, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.139490597476791e-07, "logits/generated": -2.2855031490325928, "logits/real": -2.2533984184265137, "logps/generated": -515.9293823242188, "logps/real": -389.2561340332031, "loss": 0.098, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.194608688354492, "rewards/margins": 9.927949905395508, "rewards/real": -2.2666568756103516, "step": 1190 }, { "epoch": 0.77, "learning_rate": 4.127588669364437e-07, "logits/generated": -2.2684109210968018, "logits/real": -2.2653377056121826, "logps/generated": -492.51708984375, "logps/real": -367.3876953125, "loss": 0.0969, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.415742874145508, "rewards/margins": 8.819601058959961, "rewards/real": -3.5961413383483887, "step": 1200 }, { "epoch": 0.78, "learning_rate": 4.115686741252083e-07, "logits/generated": -2.3267722129821777, "logits/real": -2.3436107635498047, "logps/generated": -532.1981201171875, "logps/real": -420.6949157714844, "loss": 0.0925, "rewards/accuracies": 0.949999988079071, "rewards/generated": -11.921398162841797, "rewards/margins": 9.262883186340332, "rewards/real": -2.6585140228271484, "step": 1210 }, { "epoch": 0.78, "learning_rate": 4.103784813139728e-07, "logits/generated": -2.3338916301727295, "logits/real": -2.334585666656494, "logps/generated": -532.9281616210938, "logps/real": -377.1614990234375, "loss": 0.1003, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.63199234008789, "rewards/margins": 9.967310905456543, "rewards/real": -2.6646811962127686, "step": 1220 }, { "epoch": 0.79, "learning_rate": 4.091882885027374e-07, "logits/generated": -2.4137134552001953, "logits/real": -2.4191126823425293, "logps/generated": -543.4132080078125, "logps/real": -409.27313232421875, "loss": 0.0698, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.591083526611328, "rewards/margins": 9.39649772644043, "rewards/real": -2.1945860385894775, "step": 1230 }, { "epoch": 0.8, "learning_rate": 4.07998095691502e-07, "logits/generated": -2.4814438819885254, "logits/real": -2.460033893585205, "logps/generated": -558.88427734375, "logps/real": -361.6279296875, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/generated": -12.638830184936523, "rewards/margins": 10.296621322631836, "rewards/real": -2.342207670211792, "step": 1240 }, { "epoch": 0.8, "learning_rate": 4.0680790288026654e-07, "logits/generated": -2.347885847091675, "logits/real": -2.360043525695801, "logps/generated": -555.4683837890625, "logps/real": -371.6921081542969, "loss": 0.0769, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.670600891113281, "rewards/margins": 10.81347370147705, "rewards/real": -1.857126235961914, "step": 1250 }, { "epoch": 0.81, "learning_rate": 4.056177100690312e-07, "logits/generated": -2.285675048828125, "logits/real": -2.264207363128662, "logps/generated": -540.85546875, "logps/real": -306.2080078125, "loss": 0.0912, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.267425537109375, "rewards/margins": 10.254603385925293, "rewards/real": -3.0128207206726074, "step": 1260 }, { "epoch": 0.82, "learning_rate": 4.044275172577957e-07, "logits/generated": -2.256348133087158, "logits/real": -2.257491111755371, "logps/generated": -484.46746826171875, "logps/real": -345.3021240234375, "loss": 0.0676, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.33599853515625, "rewards/margins": 10.659614562988281, "rewards/real": -2.6763834953308105, "step": 1270 }, { "epoch": 0.82, "learning_rate": 4.0323732444656036e-07, "logits/generated": -2.310236930847168, "logits/real": -2.3040318489074707, "logps/generated": -652.2039794921875, "logps/real": -504.1841735839844, "loss": 0.0643, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -14.383448600769043, "rewards/margins": 11.50027084350586, "rewards/real": -2.8831779956817627, "step": 1280 }, { "epoch": 0.83, "learning_rate": 4.020471316353249e-07, "logits/generated": -2.356076717376709, "logits/real": -2.3591604232788086, "logps/generated": -550.447265625, "logps/real": -353.8184509277344, "loss": 0.0918, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -14.289446830749512, "rewards/margins": 11.606106758117676, "rewards/real": -2.6833410263061523, "step": 1290 }, { "epoch": 0.84, "learning_rate": 4.008569388240895e-07, "logits/generated": -2.360548257827759, "logits/real": -2.312885284423828, "logps/generated": -535.8861083984375, "logps/real": -367.6419372558594, "loss": 0.1156, "rewards/accuracies": 0.925000011920929, "rewards/generated": -13.502528190612793, "rewards/margins": 10.530913352966309, "rewards/real": -2.971615791320801, "step": 1300 }, { "epoch": 0.84, "learning_rate": 3.996667460128541e-07, "logits/generated": -2.3651747703552246, "logits/real": -2.385131359100342, "logps/generated": -576.5161743164062, "logps/real": -374.0749206542969, "loss": 0.0621, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.620333671569824, "rewards/margins": 11.384607315063477, "rewards/real": -3.2357261180877686, "step": 1310 }, { "epoch": 0.85, "learning_rate": 3.9847655320161867e-07, "logits/generated": -2.3751935958862305, "logits/real": -2.4091992378234863, "logps/generated": -528.142333984375, "logps/real": -353.9475402832031, "loss": 0.1308, "rewards/accuracies": 0.9375, "rewards/generated": -13.35066032409668, "rewards/margins": 11.197701454162598, "rewards/real": -2.1529600620269775, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.972863603903832e-07, "logits/generated": -2.3546595573425293, "logits/real": -2.3350775241851807, "logps/generated": -561.2341918945312, "logps/real": -414.78094482421875, "loss": 0.0393, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.160077095031738, "rewards/margins": 10.612679481506348, "rewards/real": -2.5473971366882324, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.9609616757914784e-07, "logits/generated": -2.3799915313720703, "logits/real": -2.4229447841644287, "logps/generated": -529.3641357421875, "logps/real": -367.49676513671875, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.449444770812988, "rewards/margins": 10.857105255126953, "rewards/real": -2.5923383235931396, "step": 1340 }, { "epoch": 0.87, "learning_rate": 3.949059747679124e-07, "logits/generated": -2.3573193550109863, "logits/real": -2.361992359161377, "logps/generated": -541.9324340820312, "logps/real": -408.26885986328125, "loss": 0.0703, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.906323432922363, "rewards/margins": 10.026899337768555, "rewards/real": -3.8794217109680176, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.9371578195667697e-07, "logits/generated": -2.3486838340759277, "logits/real": -2.3597350120544434, "logps/generated": -507.2032775878906, "logps/real": -343.7771301269531, "loss": 0.0878, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.98302936553955, "rewards/margins": 11.527512550354004, "rewards/real": -3.4555180072784424, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.9252558914544156e-07, "logits/generated": -2.473050594329834, "logits/real": -2.515122652053833, "logps/generated": -507.3802185058594, "logps/real": -396.953125, "loss": 0.0873, "rewards/accuracies": 0.9375, "rewards/generated": -13.604555130004883, "rewards/margins": 11.562819480895996, "rewards/real": -2.041734218597412, "step": 1370 }, { "epoch": 0.89, "learning_rate": 3.9133539633420615e-07, "logits/generated": -2.3934197425842285, "logits/real": -2.3536086082458496, "logps/generated": -532.2623901367188, "logps/real": -370.50146484375, "loss": 0.1024, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.107502937316895, "rewards/margins": 9.64056396484375, "rewards/real": -2.4669392108917236, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.901452035229707e-07, "logits/generated": -2.3607726097106934, "logits/real": -2.3698906898498535, "logps/generated": -516.7948608398438, "logps/real": -358.2063293457031, "loss": 0.0738, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.62265396118164, "rewards/margins": 11.326190948486328, "rewards/real": -1.2964636087417603, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.8895501071173533e-07, "logits/generated": -2.3465137481689453, "logits/real": -2.3854851722717285, "logps/generated": -521.5613403320312, "logps/real": -320.7744140625, "loss": 0.0709, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.96727180480957, "rewards/margins": 10.701366424560547, "rewards/real": -2.2659034729003906, "step": 1400 }, { "epoch": 0.91, "learning_rate": 3.8776481790049987e-07, "logits/generated": -2.39072847366333, "logits/real": -2.3972249031066895, "logps/generated": -533.3491821289062, "logps/real": -341.78558349609375, "loss": 0.0688, "rewards/accuracies": 0.949999988079071, "rewards/generated": -14.228726387023926, "rewards/margins": 12.107359886169434, "rewards/real": -2.1213667392730713, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.865746250892644e-07, "logits/generated": -2.2480862140655518, "logits/real": -2.340782642364502, "logps/generated": -566.9257202148438, "logps/real": -318.01495361328125, "loss": 0.0588, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -15.223588943481445, "rewards/margins": 12.4530029296875, "rewards/real": -2.77058482170105, "step": 1420 }, { "epoch": 0.92, "learning_rate": 3.8538443227802905e-07, "logits/generated": -2.261993169784546, "logits/real": -2.2912187576293945, "logps/generated": -497.12091064453125, "logps/real": -354.21795654296875, "loss": 0.1078, "rewards/accuracies": 0.949999988079071, "rewards/generated": -13.343725204467773, "rewards/margins": 10.44404411315918, "rewards/real": -2.8996803760528564, "step": 1430 }, { "epoch": 0.93, "learning_rate": 3.841942394667936e-07, "logits/generated": -2.333307981491089, "logits/real": -2.3561549186706543, "logps/generated": -516.3057250976562, "logps/real": -322.0869140625, "loss": 0.0865, "rewards/accuracies": 0.925000011920929, "rewards/generated": -13.671916007995605, "rewards/margins": 10.728243827819824, "rewards/real": -2.943671226501465, "step": 1440 }, { "epoch": 0.93, "learning_rate": 3.8300404665555817e-07, "logits/generated": -2.332399845123291, "logits/real": -2.285750389099121, "logps/generated": -566.4459228515625, "logps/real": -421.2608337402344, "loss": 0.068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.901707649230957, "rewards/margins": 11.070039749145508, "rewards/real": -2.8316686153411865, "step": 1450 }, { "epoch": 0.94, "learning_rate": 3.8181385384432276e-07, "logits/generated": -2.232886791229248, "logits/real": -2.280543804168701, "logps/generated": -488.8804626464844, "logps/real": -350.1278991699219, "loss": 0.0911, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.198708534240723, "rewards/margins": 9.276993751525879, "rewards/real": -2.92171573638916, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.8062366103308735e-07, "logits/generated": -2.254783868789673, "logits/real": -2.3048110008239746, "logps/generated": -550.8900146484375, "logps/real": -392.82562255859375, "loss": 0.1241, "rewards/accuracies": 0.949999988079071, "rewards/generated": -13.891596794128418, "rewards/margins": 11.135417938232422, "rewards/real": -2.7561793327331543, "step": 1470 }, { "epoch": 0.95, "learning_rate": 3.794334682218519e-07, "logits/generated": -2.182992458343506, "logits/real": -2.2217135429382324, "logps/generated": -545.983154296875, "logps/real": -389.5903015136719, "loss": 0.0698, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.36902141571045, "rewards/margins": 11.656338691711426, "rewards/real": -2.712681770324707, "step": 1480 }, { "epoch": 0.96, "learning_rate": 3.7824327541061653e-07, "logits/generated": -2.213576555252075, "logits/real": -2.1897943019866943, "logps/generated": -545.77490234375, "logps/real": -393.1239013671875, "loss": 0.1254, "rewards/accuracies": 0.949999988079071, "rewards/generated": -13.385502815246582, "rewards/margins": 10.636938095092773, "rewards/real": -2.748565673828125, "step": 1490 }, { "epoch": 0.96, "learning_rate": 3.7705308259938107e-07, "logits/generated": -2.2783615589141846, "logits/real": -2.3009047508239746, "logps/generated": -476.43389892578125, "logps/real": -353.0292053222656, "loss": 0.0697, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.137995719909668, "rewards/margins": 9.004945755004883, "rewards/real": -3.1330504417419434, "step": 1500 }, { "epoch": 0.97, "learning_rate": 3.7586288978814566e-07, "logits/generated": -2.351835250854492, "logits/real": -2.3665931224823, "logps/generated": -560.801025390625, "logps/real": -415.95599365234375, "loss": 0.0882, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.19482707977295, "rewards/margins": 11.202047348022461, "rewards/real": -1.992780327796936, "step": 1510 }, { "epoch": 0.98, "learning_rate": 3.7467269697691025e-07, "logits/generated": -2.382901430130005, "logits/real": -2.393000841140747, "logps/generated": -505.42071533203125, "logps/real": -403.2711181640625, "loss": 0.1249, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -11.522603988647461, "rewards/margins": 9.553156852722168, "rewards/real": -1.9694464206695557, "step": 1520 }, { "epoch": 0.98, "learning_rate": 3.7348250416567484e-07, "logits/generated": -2.3661327362060547, "logits/real": -2.366842746734619, "logps/generated": -558.5916748046875, "logps/real": -373.6806640625, "loss": 0.08, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.855193138122559, "rewards/margins": 10.633702278137207, "rewards/real": -1.2214914560317993, "step": 1530 }, { "epoch": 0.99, "learning_rate": 3.722923113544394e-07, "logits/generated": -2.339836597442627, "logits/real": -2.34541654586792, "logps/generated": -536.8855590820312, "logps/real": -369.8499450683594, "loss": 0.0723, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.604657173156738, "rewards/margins": 10.141790390014648, "rewards/real": -1.462865948677063, "step": 1540 }, { "epoch": 1.0, "learning_rate": 3.71102118543204e-07, "logits/generated": -2.3748319149017334, "logits/real": -2.347130298614502, "logps/generated": -509.50506591796875, "logps/real": -383.81207275390625, "loss": 0.1114, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.870028495788574, "rewards/margins": 9.974245071411133, "rewards/real": -0.8957852125167847, "step": 1550 }, { "epoch": 1.0, "learning_rate": 3.6991192573196855e-07, "logits/generated": -2.3040242195129395, "logits/real": -2.3185038566589355, "logps/generated": -511.0401916503906, "logps/real": -363.1226501464844, "loss": 0.0345, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.427467346191406, "rewards/margins": 11.945287704467773, "rewards/real": -1.4821794033050537, "step": 1560 }, { "epoch": 1.01, "learning_rate": 3.6872173292073314e-07, "logits/generated": -2.3647196292877197, "logits/real": -2.3546738624572754, "logps/generated": -562.1310424804688, "logps/real": -375.43316650390625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -14.149296760559082, "rewards/margins": 12.207903861999512, "rewards/real": -1.9413917064666748, "step": 1570 }, { "epoch": 1.02, "learning_rate": 3.6753154010949773e-07, "logits/generated": -2.301741123199463, "logits/real": -2.3652164936065674, "logps/generated": -549.0155639648438, "logps/real": -403.6900329589844, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -15.563409805297852, "rewards/margins": 14.276460647583008, "rewards/real": -1.2869514226913452, "step": 1580 }, { "epoch": 1.02, "learning_rate": 3.663413472982623e-07, "logits/generated": -2.261230707168579, "logits/real": -2.3311500549316406, "logps/generated": -563.3414916992188, "logps/real": -348.5225524902344, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/generated": -15.899667739868164, "rewards/margins": 13.73414421081543, "rewards/real": -2.1655211448669434, "step": 1590 }, { "epoch": 1.03, "learning_rate": 3.6515115448702686e-07, "logits/generated": -2.372976779937744, "logits/real": -2.400886058807373, "logps/generated": -569.90673828125, "logps/real": -390.90704345703125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -14.986291885375977, "rewards/margins": 13.204297065734863, "rewards/real": -1.7819948196411133, "step": 1600 }, { "epoch": 1.03, "learning_rate": 3.639609616757915e-07, "logits/generated": -2.2624919414520264, "logits/real": -2.3082499504089355, "logps/generated": -564.4075317382812, "logps/real": -394.94287109375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/generated": -15.597874641418457, "rewards/margins": 13.840059280395508, "rewards/real": -1.7578150033950806, "step": 1610 }, { "epoch": 1.04, "learning_rate": 3.6277076886455604e-07, "logits/generated": -2.1787052154541016, "logits/real": -2.215146780014038, "logps/generated": -520.8050537109375, "logps/real": -337.3519287109375, "loss": 0.0162, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.893610954284668, "rewards/margins": 13.641830444335938, "rewards/real": -2.2517800331115723, "step": 1620 }, { "epoch": 1.05, "learning_rate": 3.615805760533206e-07, "logits/generated": -2.227945566177368, "logits/real": -2.295309543609619, "logps/generated": -569.5865478515625, "logps/real": -361.73980712890625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/generated": -16.241802215576172, "rewards/margins": 14.572728157043457, "rewards/real": -1.669075608253479, "step": 1630 }, { "epoch": 1.05, "learning_rate": 3.603903832420852e-07, "logits/generated": -2.281085968017578, "logits/real": -2.32185697555542, "logps/generated": -550.8215942382812, "logps/real": -361.8749694824219, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/generated": -15.377789497375488, "rewards/margins": 12.968729972839355, "rewards/real": -2.4090590476989746, "step": 1640 }, { "epoch": 1.06, "learning_rate": 3.5920019043084976e-07, "logits/generated": -2.36209774017334, "logits/real": -2.285122871398926, "logps/generated": -528.7139892578125, "logps/real": -321.8282775878906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -15.81471061706543, "rewards/margins": 13.131657600402832, "rewards/real": -2.683054208755493, "step": 1650 }, { "epoch": 1.07, "learning_rate": 3.5800999761961435e-07, "logits/generated": -2.2304556369781494, "logits/real": -2.2633605003356934, "logps/generated": -569.7330932617188, "logps/real": -362.989501953125, "loss": 0.0132, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.337574005126953, "rewards/margins": 14.290669441223145, "rewards/real": -2.046905994415283, "step": 1660 }, { "epoch": 1.07, "learning_rate": 3.5681980480837893e-07, "logits/generated": -2.1861634254455566, "logits/real": -2.213642120361328, "logps/generated": -559.1907348632812, "logps/real": -386.95330810546875, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.114718437194824, "rewards/margins": 12.973544120788574, "rewards/real": -2.1411757469177246, "step": 1670 }, { "epoch": 1.08, "learning_rate": 3.556296119971435e-07, "logits/generated": -2.160221815109253, "logits/real": -2.219512701034546, "logps/generated": -548.2822265625, "logps/real": -373.2680358886719, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -17.96226692199707, "rewards/margins": 15.323366165161133, "rewards/real": -2.6389002799987793, "step": 1680 }, { "epoch": 1.09, "learning_rate": 3.5443941918590806e-07, "logits/generated": -2.237609386444092, "logits/real": -2.2286012172698975, "logps/generated": -572.4905395507812, "logps/real": -334.9125061035156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -18.150920867919922, "rewards/margins": 14.536825180053711, "rewards/real": -3.614098072052002, "step": 1690 }, { "epoch": 1.09, "learning_rate": 3.532492263746727e-07, "logits/generated": -2.184624195098877, "logits/real": -2.211127281188965, "logps/generated": -608.9227905273438, "logps/real": -361.32684326171875, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.036636352539062, "rewards/margins": 16.421756744384766, "rewards/real": -2.6148791313171387, "step": 1700 }, { "epoch": 1.1, "learning_rate": 3.5205903356343724e-07, "logits/generated": -2.1313374042510986, "logits/real": -2.173050880432129, "logps/generated": -598.5973510742188, "logps/real": -355.4421081542969, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/generated": -20.13778305053711, "rewards/margins": 16.43929672241211, "rewards/real": -3.6984870433807373, "step": 1710 }, { "epoch": 1.11, "learning_rate": 3.5086884075220183e-07, "logits/generated": -2.087921619415283, "logits/real": -2.1425328254699707, "logps/generated": -566.9527587890625, "logps/real": -332.5347595214844, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -19.658199310302734, "rewards/margins": 15.989227294921875, "rewards/real": -3.6689727306365967, "step": 1720 }, { "epoch": 1.11, "learning_rate": 3.496786479409664e-07, "logits/generated": -2.07328200340271, "logits/real": -2.1263468265533447, "logps/generated": -567.5440063476562, "logps/real": -416.94256591796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -19.27109146118164, "rewards/margins": 15.73029899597168, "rewards/real": -3.540794849395752, "step": 1730 }, { "epoch": 1.12, "learning_rate": 3.48488455129731e-07, "logits/generated": -2.0371181964874268, "logits/real": -2.082137107849121, "logps/generated": -556.2427978515625, "logps/real": -303.4817810058594, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.692432403564453, "rewards/margins": 16.322385787963867, "rewards/real": -3.3700478076934814, "step": 1740 }, { "epoch": 1.12, "learning_rate": 3.4729826231849555e-07, "logits/generated": -2.097503423690796, "logits/real": -2.1778554916381836, "logps/generated": -610.5614013671875, "logps/real": -399.98358154296875, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.528287887573242, "rewards/margins": 15.74824047088623, "rewards/real": -3.780047655105591, "step": 1750 }, { "epoch": 1.13, "learning_rate": 3.461080695072602e-07, "logits/generated": -2.0831446647644043, "logits/real": -2.2034473419189453, "logps/generated": -588.8594970703125, "logps/real": -398.7830810546875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -17.589412689208984, "rewards/margins": 16.229389190673828, "rewards/real": -1.360022783279419, "step": 1760 }, { "epoch": 1.14, "learning_rate": 3.449178766960247e-07, "logits/generated": -2.1205058097839355, "logits/real": -2.1790976524353027, "logps/generated": -588.9290771484375, "logps/real": -380.9736633300781, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -17.767770767211914, "rewards/margins": 14.602663040161133, "rewards/real": -3.1651082038879395, "step": 1770 }, { "epoch": 1.14, "learning_rate": 3.4372768388478937e-07, "logits/generated": -2.1685116291046143, "logits/real": -2.245673656463623, "logps/generated": -547.3893432617188, "logps/real": -357.3170471191406, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -16.38484001159668, "rewards/margins": 14.403864860534668, "rewards/real": -1.9809764623641968, "step": 1780 }, { "epoch": 1.15, "learning_rate": 3.425374910735539e-07, "logits/generated": -2.249411106109619, "logits/real": -2.279759645462036, "logps/generated": -618.198486328125, "logps/real": -434.0210876464844, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.497578620910645, "rewards/margins": 12.881543159484863, "rewards/real": -2.616034984588623, "step": 1790 }, { "epoch": 1.16, "learning_rate": 3.413472982623185e-07, "logits/generated": -2.3159961700439453, "logits/real": -2.294581651687622, "logps/generated": -537.07861328125, "logps/real": -337.27874755859375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -17.291156768798828, "rewards/margins": 15.182905197143555, "rewards/real": -2.1082491874694824, "step": 1800 }, { "epoch": 1.16, "learning_rate": 3.401571054510831e-07, "logits/generated": -2.2418415546417236, "logits/real": -2.266832113265991, "logps/generated": -598.0546264648438, "logps/real": -402.66510009765625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -16.92933464050293, "rewards/margins": 14.629178047180176, "rewards/real": -2.300158739089966, "step": 1810 }, { "epoch": 1.17, "learning_rate": 3.389669126398476e-07, "logits/generated": -2.200573205947876, "logits/real": -2.224865436553955, "logps/generated": -546.8048706054688, "logps/real": -372.92010498046875, "loss": 0.0072, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.779863357543945, "rewards/margins": 15.325950622558594, "rewards/real": -3.4539108276367188, "step": 1820 }, { "epoch": 1.18, "learning_rate": 3.377767198286122e-07, "logits/generated": -2.172501802444458, "logits/real": -2.224087953567505, "logps/generated": -577.8551025390625, "logps/real": -396.276123046875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/generated": -19.138914108276367, "rewards/margins": 15.50109577178955, "rewards/real": -3.6378180980682373, "step": 1830 }, { "epoch": 1.18, "learning_rate": 3.365865270173768e-07, "logits/generated": -2.222236394882202, "logits/real": -2.215315341949463, "logps/generated": -606.78955078125, "logps/real": -401.0457763671875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -18.88642120361328, "rewards/margins": 16.262500762939453, "rewards/real": -2.6239190101623535, "step": 1840 }, { "epoch": 1.19, "learning_rate": 3.353963342061414e-07, "logits/generated": -2.0716452598571777, "logits/real": -2.118318796157837, "logps/generated": -649.42431640625, "logps/real": -411.53338623046875, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.20388412475586, "rewards/margins": 17.11600112915039, "rewards/real": -3.0878825187683105, "step": 1850 }, { "epoch": 1.2, "learning_rate": 3.3420614139490593e-07, "logits/generated": -2.0571255683898926, "logits/real": -2.0999083518981934, "logps/generated": -555.8407592773438, "logps/real": -349.90155029296875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -18.651325225830078, "rewards/margins": 15.208650588989258, "rewards/real": -3.442675828933716, "step": 1860 }, { "epoch": 1.2, "learning_rate": 3.3301594858367057e-07, "logits/generated": -2.0144572257995605, "logits/real": -2.0585360527038574, "logps/generated": -564.4594116210938, "logps/real": -357.2227478027344, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -20.891742706298828, "rewards/margins": 17.29900550842285, "rewards/real": -3.5927371978759766, "step": 1870 }, { "epoch": 1.21, "learning_rate": 3.318257557724351e-07, "logits/generated": -2.096843957901001, "logits/real": -2.2019436359405518, "logps/generated": -579.1746826171875, "logps/real": -376.2278137207031, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/generated": -20.490825653076172, "rewards/margins": 15.950909614562988, "rewards/real": -4.539917945861816, "step": 1880 }, { "epoch": 1.21, "learning_rate": 3.306355629611997e-07, "logits/generated": -2.2774910926818848, "logits/real": -2.312415599822998, "logps/generated": -589.5444946289062, "logps/real": -355.139404296875, "loss": 0.012, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.514019012451172, "rewards/margins": 14.974294662475586, "rewards/real": -2.5397236347198486, "step": 1890 }, { "epoch": 1.22, "learning_rate": 3.294453701499643e-07, "logits/generated": -2.2411015033721924, "logits/real": -2.2287962436676025, "logps/generated": -519.8386840820312, "logps/real": -335.5692443847656, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/generated": -16.851512908935547, "rewards/margins": 13.842289924621582, "rewards/real": -3.0092215538024902, "step": 1900 }, { "epoch": 1.23, "learning_rate": 3.282551773387289e-07, "logits/generated": -2.2905209064483643, "logits/real": -2.3136816024780273, "logps/generated": -624.5968017578125, "logps/real": -403.45501708984375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/generated": -17.708858489990234, "rewards/margins": 15.356656074523926, "rewards/real": -2.352205276489258, "step": 1910 }, { "epoch": 1.23, "learning_rate": 3.270649845274934e-07, "logits/generated": -2.268571376800537, "logits/real": -2.270794630050659, "logps/generated": -538.6395263671875, "logps/real": -346.6217346191406, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/generated": -12.539231300354004, "rewards/margins": 12.0559720993042, "rewards/real": -0.4832596778869629, "step": 1920 }, { "epoch": 1.24, "learning_rate": 3.2587479171625806e-07, "logits/generated": -2.176980495452881, "logits/real": -2.2080063819885254, "logps/generated": -514.9129638671875, "logps/real": -330.4537048339844, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -15.298626899719238, "rewards/margins": 13.659899711608887, "rewards/real": -1.6387275457382202, "step": 1930 }, { "epoch": 1.25, "learning_rate": 3.246845989050226e-07, "logits/generated": -2.1879403591156006, "logits/real": -2.1978180408477783, "logps/generated": -593.1455078125, "logps/real": -375.1646423339844, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/generated": -17.365800857543945, "rewards/margins": 15.398488998413086, "rewards/real": -1.967309594154358, "step": 1940 }, { "epoch": 1.25, "learning_rate": 3.234944060937872e-07, "logits/generated": -2.192488193511963, "logits/real": -2.1515822410583496, "logps/generated": -523.1942138671875, "logps/real": -323.0399475097656, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/generated": -17.100679397583008, "rewards/margins": 14.689547538757324, "rewards/real": -2.4111287593841553, "step": 1950 }, { "epoch": 1.26, "learning_rate": 3.2230421328255177e-07, "logits/generated": -2.1857666969299316, "logits/real": -2.1973021030426025, "logps/generated": -573.9412841796875, "logps/real": -383.3498840332031, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -15.325065612792969, "rewards/margins": 13.42688274383545, "rewards/real": -1.8981819152832031, "step": 1960 }, { "epoch": 1.27, "learning_rate": 3.2111402047131636e-07, "logits/generated": -2.1818251609802246, "logits/real": -2.138362169265747, "logps/generated": -582.6113891601562, "logps/real": -393.2650146484375, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.432369232177734, "rewards/margins": 14.267339706420898, "rewards/real": -3.1650280952453613, "step": 1970 }, { "epoch": 1.27, "learning_rate": 3.199238276600809e-07, "logits/generated": -2.1671457290649414, "logits/real": -2.165565013885498, "logps/generated": -562.9454345703125, "logps/real": -394.97747802734375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -17.854740142822266, "rewards/margins": 14.695347785949707, "rewards/real": -3.159393072128296, "step": 1980 }, { "epoch": 1.28, "learning_rate": 3.1873363484884554e-07, "logits/generated": -2.1330654621124268, "logits/real": -2.1521830558776855, "logps/generated": -639.1904296875, "logps/real": -405.31842041015625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -20.329586029052734, "rewards/margins": 17.111438751220703, "rewards/real": -3.218146800994873, "step": 1990 }, { "epoch": 1.29, "learning_rate": 3.175434420376101e-07, "logits/generated": -2.189150333404541, "logits/real": -2.256707191467285, "logps/generated": -564.0371704101562, "logps/real": -355.9576110839844, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -19.11702537536621, "rewards/margins": 15.930302619934082, "rewards/real": -3.186723232269287, "step": 2000 }, { "epoch": 1.29, "learning_rate": 3.1635324922637467e-07, "logits/generated": -2.059345245361328, "logits/real": -2.0931825637817383, "logps/generated": -592.4450073242188, "logps/real": -353.2586975097656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -19.638408660888672, "rewards/margins": 15.883349418640137, "rewards/real": -3.7550551891326904, "step": 2010 }, { "epoch": 1.3, "learning_rate": 3.1516305641513926e-07, "logits/generated": -2.092940092086792, "logits/real": -2.1705400943756104, "logps/generated": -628.2099609375, "logps/real": -390.40728759765625, "loss": 0.0169, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.64360237121582, "rewards/margins": 16.640026092529297, "rewards/real": -4.003575801849365, "step": 2020 }, { "epoch": 1.3, "learning_rate": 3.139728636039038e-07, "logits/generated": -1.9676166772842407, "logits/real": -2.0171456336975098, "logps/generated": -546.7244873046875, "logps/real": -362.56488037109375, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.221649169921875, "rewards/margins": 15.068666458129883, "rewards/real": -4.152983665466309, "step": 2030 }, { "epoch": 1.31, "learning_rate": 3.127826707926684e-07, "logits/generated": -2.1424899101257324, "logits/real": -2.1512458324432373, "logps/generated": -566.885986328125, "logps/real": -319.979736328125, "loss": 0.0091, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.882003784179688, "rewards/margins": 15.76966667175293, "rewards/real": -3.112335205078125, "step": 2040 }, { "epoch": 1.32, "learning_rate": 3.11592477981433e-07, "logits/generated": -2.140007495880127, "logits/real": -2.138995409011841, "logps/generated": -605.3294677734375, "logps/real": -381.41290283203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -18.86813735961914, "rewards/margins": 14.63634967803955, "rewards/real": -4.231788158416748, "step": 2050 }, { "epoch": 1.32, "learning_rate": 3.1040228517019756e-07, "logits/generated": -2.1069555282592773, "logits/real": -2.0523293018341064, "logps/generated": -636.1548461914062, "logps/real": -395.80914306640625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/generated": -21.208894729614258, "rewards/margins": 18.558944702148438, "rewards/real": -2.649949550628662, "step": 2060 }, { "epoch": 1.33, "learning_rate": 3.092120923589621e-07, "logits/generated": -2.1070761680603027, "logits/real": -2.1584606170654297, "logps/generated": -615.450439453125, "logps/real": -398.21844482421875, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/generated": -19.43939208984375, "rewards/margins": 14.954734802246094, "rewards/real": -4.484656810760498, "step": 2070 }, { "epoch": 1.34, "learning_rate": 3.0802189954772674e-07, "logits/generated": -2.0256807804107666, "logits/real": -2.057490825653076, "logps/generated": -540.1599731445312, "logps/real": -362.2354431152344, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.157804489135742, "rewards/margins": 14.374547004699707, "rewards/real": -3.7832565307617188, "step": 2080 }, { "epoch": 1.34, "learning_rate": 3.068317067364913e-07, "logits/generated": -2.0899441242218018, "logits/real": -2.1068785190582275, "logps/generated": -626.4033813476562, "logps/real": -453.5624084472656, "loss": 0.0165, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.779939651489258, "rewards/margins": 14.692364692687988, "rewards/real": -3.0875754356384277, "step": 2090 }, { "epoch": 1.35, "learning_rate": 3.0564151392525587e-07, "logits/generated": -1.9991929531097412, "logits/real": -2.0575671195983887, "logps/generated": -572.4451904296875, "logps/real": -387.9151306152344, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/generated": -18.603885650634766, "rewards/margins": 14.808444023132324, "rewards/real": -3.7954421043395996, "step": 2100 }, { "epoch": 1.36, "learning_rate": 3.0445132111402046e-07, "logits/generated": -1.957606315612793, "logits/real": -2.0042033195495605, "logps/generated": -491.47344970703125, "logps/real": -340.8980712890625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -16.086580276489258, "rewards/margins": 13.114072799682617, "rewards/real": -2.9725046157836914, "step": 2110 }, { "epoch": 1.36, "learning_rate": 3.0326112830278505e-07, "logits/generated": -1.9950910806655884, "logits/real": -2.022733211517334, "logps/generated": -572.0457763671875, "logps/real": -401.0966796875, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/generated": -18.224430084228516, "rewards/margins": 15.097747802734375, "rewards/real": -3.126683473587036, "step": 2120 }, { "epoch": 1.37, "learning_rate": 3.020709354915496e-07, "logits/generated": -1.8846557140350342, "logits/real": -2.030510187149048, "logps/generated": -571.71337890625, "logps/real": -432.107421875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -17.2412166595459, "rewards/margins": 14.449191093444824, "rewards/real": -2.7920241355895996, "step": 2130 }, { "epoch": 1.38, "learning_rate": 3.0088074268031423e-07, "logits/generated": -1.9635789394378662, "logits/real": -2.010420322418213, "logps/generated": -595.8865966796875, "logps/real": -395.47271728515625, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.31340217590332, "rewards/margins": 13.558954238891602, "rewards/real": -2.754448175430298, "step": 2140 }, { "epoch": 1.38, "learning_rate": 2.9969054986907876e-07, "logits/generated": -1.8813692331314087, "logits/real": -1.973508596420288, "logps/generated": -504.12823486328125, "logps/real": -347.0367736816406, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -16.611045837402344, "rewards/margins": 14.725499153137207, "rewards/real": -1.8855485916137695, "step": 2150 }, { "epoch": 1.39, "learning_rate": 2.9850035705784335e-07, "logits/generated": -1.9549331665039062, "logits/real": -2.0346851348876953, "logps/generated": -609.5227661132812, "logps/real": -419.77618408203125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -17.863967895507812, "rewards/margins": 14.792566299438477, "rewards/real": -3.071401357650757, "step": 2160 }, { "epoch": 1.39, "learning_rate": 2.9731016424660794e-07, "logits/generated": -1.9871995449066162, "logits/real": -1.9926321506500244, "logps/generated": -571.4608764648438, "logps/real": -346.6838684082031, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/generated": -16.64493751525879, "rewards/margins": 14.440625190734863, "rewards/real": -2.2043120861053467, "step": 2170 }, { "epoch": 1.4, "learning_rate": 2.9611997143537253e-07, "logits/generated": -1.995072603225708, "logits/real": -1.9926702976226807, "logps/generated": -639.6512451171875, "logps/real": -369.5633239746094, "loss": 0.0106, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.795745849609375, "rewards/margins": 15.904996871948242, "rewards/real": -2.890749454498291, "step": 2180 }, { "epoch": 1.41, "learning_rate": 2.9492977862413707e-07, "logits/generated": -2.0281620025634766, "logits/real": -1.9819482564926147, "logps/generated": -570.8590698242188, "logps/real": -378.61474609375, "loss": 0.0159, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.853015899658203, "rewards/margins": 13.683568000793457, "rewards/real": -4.169447898864746, "step": 2190 }, { "epoch": 1.41, "learning_rate": 2.937395858129017e-07, "logits/generated": -1.9663385152816772, "logits/real": -2.0112898349761963, "logps/generated": -612.8561401367188, "logps/real": -383.02532958984375, "loss": 0.017, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.021343231201172, "rewards/margins": 16.094024658203125, "rewards/real": -2.927319288253784, "step": 2200 }, { "epoch": 1.42, "learning_rate": 2.9254939300166625e-07, "logits/generated": -1.9601085186004639, "logits/real": -2.0084640979766846, "logps/generated": -595.7770385742188, "logps/real": -402.1640319824219, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -18.758970260620117, "rewards/margins": 15.546884536743164, "rewards/real": -3.2120864391326904, "step": 2210 }, { "epoch": 1.43, "learning_rate": 2.913592001904308e-07, "logits/generated": -1.959242820739746, "logits/real": -2.011341094970703, "logps/generated": -582.9662475585938, "logps/real": -326.1315002441406, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.912372589111328, "rewards/margins": 15.57469654083252, "rewards/real": -3.3376784324645996, "step": 2220 }, { "epoch": 1.43, "learning_rate": 2.9016900737919543e-07, "logits/generated": -1.8837999105453491, "logits/real": -2.0170726776123047, "logps/generated": -615.4156494140625, "logps/real": -384.8844299316406, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.660049438476562, "rewards/margins": 15.770927429199219, "rewards/real": -2.8891234397888184, "step": 2230 }, { "epoch": 1.44, "learning_rate": 2.8897881456795997e-07, "logits/generated": -1.8241097927093506, "logits/real": -1.7869752645492554, "logps/generated": -593.7149047851562, "logps/real": -370.52789306640625, "loss": 0.0204, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.744731903076172, "rewards/margins": 15.868573188781738, "rewards/real": -2.8761584758758545, "step": 2240 }, { "epoch": 1.45, "learning_rate": 2.8778862175672456e-07, "logits/generated": -1.7767345905303955, "logits/real": -1.8023865222930908, "logps/generated": -588.9293823242188, "logps/real": -392.6181640625, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.669166564941406, "rewards/margins": 16.322322845458984, "rewards/real": -2.346843957901001, "step": 2250 }, { "epoch": 1.45, "learning_rate": 2.8659842894548915e-07, "logits/generated": -1.7413629293441772, "logits/real": -1.7947721481323242, "logps/generated": -632.8668212890625, "logps/real": -339.634765625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -21.01340103149414, "rewards/margins": 17.00638771057129, "rewards/real": -4.007015228271484, "step": 2260 }, { "epoch": 1.46, "learning_rate": 2.8540823613425374e-07, "logits/generated": -1.7241382598876953, "logits/real": -1.8351795673370361, "logps/generated": -553.7213134765625, "logps/real": -371.4381408691406, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -18.440507888793945, "rewards/margins": 13.914960861206055, "rewards/real": -4.525545597076416, "step": 2270 }, { "epoch": 1.47, "learning_rate": 2.8421804332301827e-07, "logits/generated": -1.7572247982025146, "logits/real": -1.8531087636947632, "logps/generated": -680.1027221679688, "logps/real": -414.3067321777344, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/generated": -21.988712310791016, "rewards/margins": 17.40224266052246, "rewards/real": -4.586469650268555, "step": 2280 }, { "epoch": 1.47, "learning_rate": 2.830278505117829e-07, "logits/generated": -1.7346735000610352, "logits/real": -1.8323335647583008, "logps/generated": -684.58642578125, "logps/real": -440.241943359375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -23.51043128967285, "rewards/margins": 17.93229103088379, "rewards/real": -5.57813835144043, "step": 2290 }, { "epoch": 1.48, "learning_rate": 2.8183765770054745e-07, "logits/generated": -1.6866531372070312, "logits/real": -1.7377674579620361, "logps/generated": -626.2985229492188, "logps/real": -398.7217102050781, "loss": 0.0209, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.974733352661133, "rewards/margins": 15.526639938354492, "rewards/real": -5.448093414306641, "step": 2300 }, { "epoch": 1.48, "learning_rate": 2.806474648893121e-07, "logits/generated": -1.7734663486480713, "logits/real": -1.852805733680725, "logps/generated": -599.9769897460938, "logps/real": -384.3834228515625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -23.156208038330078, "rewards/margins": 17.898412704467773, "rewards/real": -5.257795810699463, "step": 2310 }, { "epoch": 1.49, "learning_rate": 2.7945727207807663e-07, "logits/generated": -1.704843282699585, "logits/real": -1.8438117504119873, "logps/generated": -671.08935546875, "logps/real": -363.78399658203125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -23.627620697021484, "rewards/margins": 18.943744659423828, "rewards/real": -4.6838765144348145, "step": 2320 }, { "epoch": 1.5, "learning_rate": 2.782670792668412e-07, "logits/generated": -1.8488250970840454, "logits/real": -1.937170386314392, "logps/generated": -617.1664428710938, "logps/real": -413.4002990722656, "loss": 0.0137, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.6809024810791, "rewards/margins": 16.090904235839844, "rewards/real": -5.5899977684021, "step": 2330 }, { "epoch": 1.5, "learning_rate": 2.770768864556058e-07, "logits/generated": -1.7172123193740845, "logits/real": -1.8333232402801514, "logps/generated": -608.66943359375, "logps/real": -358.6962890625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/generated": -22.130813598632812, "rewards/margins": 17.398597717285156, "rewards/real": -4.732216835021973, "step": 2340 }, { "epoch": 1.51, "learning_rate": 2.758866936443704e-07, "logits/generated": -1.7257283926010132, "logits/real": -1.8596134185791016, "logps/generated": -620.2059936523438, "logps/real": -376.4665222167969, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -22.570621490478516, "rewards/margins": 18.450403213500977, "rewards/real": -4.120217323303223, "step": 2350 }, { "epoch": 1.52, "learning_rate": 2.7469650083313494e-07, "logits/generated": -1.8226665258407593, "logits/real": -1.9213998317718506, "logps/generated": -523.0108642578125, "logps/real": -348.8974609375, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/generated": -18.249343872070312, "rewards/margins": 14.304719924926758, "rewards/real": -3.9446239471435547, "step": 2360 }, { "epoch": 1.52, "learning_rate": 2.735063080218996e-07, "logits/generated": -1.7809902429580688, "logits/real": -1.861524224281311, "logps/generated": -596.6590576171875, "logps/real": -363.1610107421875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -19.3269100189209, "rewards/margins": 17.15434455871582, "rewards/real": -2.1725666522979736, "step": 2370 }, { "epoch": 1.53, "learning_rate": 2.723161152106641e-07, "logits/generated": -1.6831693649291992, "logits/real": -1.7770426273345947, "logps/generated": -556.0060424804688, "logps/real": -342.6446838378906, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/generated": -19.728736877441406, "rewards/margins": 15.71430492401123, "rewards/real": -4.01443338394165, "step": 2380 }, { "epoch": 1.54, "learning_rate": 2.711259223994287e-07, "logits/generated": -1.7193806171417236, "logits/real": -1.7846571207046509, "logps/generated": -513.1950073242188, "logps/real": -321.84466552734375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/generated": -18.99886131286621, "rewards/margins": 14.975049018859863, "rewards/real": -4.023811340332031, "step": 2390 }, { "epoch": 1.54, "learning_rate": 2.699357295881933e-07, "logits/generated": -1.696411371231079, "logits/real": -1.8342005014419556, "logps/generated": -525.8536376953125, "logps/real": -306.38720703125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/generated": -17.83527374267578, "rewards/margins": 13.64979076385498, "rewards/real": -4.185482978820801, "step": 2400 }, { "epoch": 1.55, "learning_rate": 2.687455367769579e-07, "logits/generated": -1.777260184288025, "logits/real": -1.8473188877105713, "logps/generated": -579.9354248046875, "logps/real": -359.2183532714844, "loss": 0.0358, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.67335319519043, "rewards/margins": 13.895036697387695, "rewards/real": -2.77831768989563, "step": 2410 }, { "epoch": 1.56, "learning_rate": 2.675553439657224e-07, "logits/generated": -1.7697776556015015, "logits/real": -1.8199243545532227, "logps/generated": -516.1290893554688, "logps/real": -342.9108581542969, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -15.577775955200195, "rewards/margins": 13.427032470703125, "rewards/real": -2.150745391845703, "step": 2420 }, { "epoch": 1.56, "learning_rate": 2.66365151154487e-07, "logits/generated": -1.768214464187622, "logits/real": -1.8642857074737549, "logps/generated": -514.9273681640625, "logps/real": -360.794677734375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -16.164379119873047, "rewards/margins": 13.772100448608398, "rewards/real": -2.3922770023345947, "step": 2430 }, { "epoch": 1.57, "learning_rate": 2.651749583432516e-07, "logits/generated": -1.8366386890411377, "logits/real": -1.8158714771270752, "logps/generated": -596.3200073242188, "logps/real": -362.0704650878906, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.17621421813965, "rewards/margins": 14.673547744750977, "rewards/real": -2.5026676654815674, "step": 2440 }, { "epoch": 1.57, "learning_rate": 2.6398476553201614e-07, "logits/generated": -1.715486764907837, "logits/real": -1.7608009576797485, "logps/generated": -592.689208984375, "logps/real": -394.7555236816406, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.238231658935547, "rewards/margins": 15.920003890991211, "rewards/real": -2.318227767944336, "step": 2450 }, { "epoch": 1.58, "learning_rate": 2.627945727207808e-07, "logits/generated": -1.8307558298110962, "logits/real": -1.8600902557373047, "logps/generated": -560.46142578125, "logps/real": -370.09747314453125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/generated": -18.495569229125977, "rewards/margins": 14.71654987335205, "rewards/real": -3.7790215015411377, "step": 2460 }, { "epoch": 1.59, "learning_rate": 2.616043799095453e-07, "logits/generated": -1.7307708263397217, "logits/real": -1.7592779397964478, "logps/generated": -584.9305419921875, "logps/real": -359.0640869140625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -19.275348663330078, "rewards/margins": 14.876652717590332, "rewards/real": -4.398694038391113, "step": 2470 }, { "epoch": 1.59, "learning_rate": 2.604141870983099e-07, "logits/generated": -1.702275037765503, "logits/real": -1.7664591073989868, "logps/generated": -556.9425048828125, "logps/real": -366.8645935058594, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -19.035137176513672, "rewards/margins": 15.076634407043457, "rewards/real": -3.9585037231445312, "step": 2480 }, { "epoch": 1.6, "learning_rate": 2.592239942870745e-07, "logits/generated": -1.8040691614151, "logits/real": -1.831241250038147, "logps/generated": -605.1649780273438, "logps/real": -383.25506591796875, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/generated": -18.789392471313477, "rewards/margins": 15.085497856140137, "rewards/real": -3.7038941383361816, "step": 2490 }, { "epoch": 1.61, "learning_rate": 2.580338014758391e-07, "logits/generated": -1.9082372188568115, "logits/real": -1.964238166809082, "logps/generated": -597.7811889648438, "logps/real": -401.4377746582031, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -18.443161010742188, "rewards/margins": 15.85346794128418, "rewards/real": -2.589694023132324, "step": 2500 }, { "epoch": 1.61, "learning_rate": 2.568436086646036e-07, "logits/generated": -1.999436616897583, "logits/real": -1.951202154159546, "logps/generated": -578.76806640625, "logps/real": -361.1490478515625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -17.621252059936523, "rewards/margins": 13.709065437316895, "rewards/real": -3.9121880531311035, "step": 2510 }, { "epoch": 1.62, "learning_rate": 2.5565341585336827e-07, "logits/generated": -1.8233203887939453, "logits/real": -1.8996546268463135, "logps/generated": -575.7025756835938, "logps/real": -398.1545715332031, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -18.55044174194336, "rewards/margins": 15.433720588684082, "rewards/real": -3.1167218685150146, "step": 2520 }, { "epoch": 1.63, "learning_rate": 2.544632230421328e-07, "logits/generated": -1.8319776058197021, "logits/real": -1.9016917943954468, "logps/generated": -609.1539306640625, "logps/real": -345.43536376953125, "loss": 0.0132, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.780593872070312, "rewards/margins": 17.442068099975586, "rewards/real": -2.338524341583252, "step": 2530 }, { "epoch": 1.63, "learning_rate": 2.532730302308974e-07, "logits/generated": -1.9279365539550781, "logits/real": -1.9061311483383179, "logps/generated": -566.6038818359375, "logps/real": -330.6295166015625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -19.055160522460938, "rewards/margins": 15.601313591003418, "rewards/real": -3.4538471698760986, "step": 2540 }, { "epoch": 1.64, "learning_rate": 2.52082837419662e-07, "logits/generated": -1.8530943393707275, "logits/real": -1.919136643409729, "logps/generated": -604.7074584960938, "logps/real": -362.3363342285156, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.198497772216797, "rewards/margins": 16.335779190063477, "rewards/real": -2.8627171516418457, "step": 2550 }, { "epoch": 1.65, "learning_rate": 2.5089264460842657e-07, "logits/generated": -1.953850507736206, "logits/real": -1.872309923171997, "logps/generated": -618.9617309570312, "logps/real": -378.0514831542969, "loss": 0.0201, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.868818283081055, "rewards/margins": 15.176447868347168, "rewards/real": -3.692370653152466, "step": 2560 }, { "epoch": 1.65, "learning_rate": 2.497024517971911e-07, "logits/generated": -1.8568464517593384, "logits/real": -1.887955665588379, "logps/generated": -605.1824340820312, "logps/real": -405.60235595703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -18.905994415283203, "rewards/margins": 15.54051685333252, "rewards/real": -3.3654770851135254, "step": 2570 }, { "epoch": 1.66, "learning_rate": 2.485122589859557e-07, "logits/generated": -1.8930320739746094, "logits/real": -1.8917458057403564, "logps/generated": -584.1777954101562, "logps/real": -337.369384765625, "loss": 0.0201, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.138484954833984, "rewards/margins": 17.352680206298828, "rewards/real": -3.785806179046631, "step": 2580 }, { "epoch": 1.66, "learning_rate": 2.473220661747203e-07, "logits/generated": -1.8552837371826172, "logits/real": -1.9193109273910522, "logps/generated": -608.6238403320312, "logps/real": -370.24603271484375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -19.182647705078125, "rewards/margins": 16.14703369140625, "rewards/real": -3.035613536834717, "step": 2590 }, { "epoch": 1.67, "learning_rate": 2.461318733634849e-07, "logits/generated": -1.7766849994659424, "logits/real": -1.8722776174545288, "logps/generated": -549.0695190429688, "logps/real": -349.0986022949219, "loss": 0.013, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.278690338134766, "rewards/margins": 16.906200408935547, "rewards/real": -2.3724896907806396, "step": 2600 }, { "epoch": 1.68, "learning_rate": 2.4494168055224947e-07, "logits/generated": -1.9358885288238525, "logits/real": -1.9020036458969116, "logps/generated": -619.1986694335938, "logps/real": -427.21258544921875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -18.58693504333496, "rewards/margins": 15.541351318359375, "rewards/real": -3.0455851554870605, "step": 2610 }, { "epoch": 1.68, "learning_rate": 2.4375148774101406e-07, "logits/generated": -1.799574613571167, "logits/real": -1.8969202041625977, "logps/generated": -522.123046875, "logps/real": -333.136474609375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -18.633255004882812, "rewards/margins": 15.599235534667969, "rewards/real": -3.0340187549591064, "step": 2620 }, { "epoch": 1.69, "learning_rate": 2.425612949297786e-07, "logits/generated": -1.8291136026382446, "logits/real": -1.8533122539520264, "logps/generated": -593.2408447265625, "logps/real": -402.7211608886719, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -19.01093101501465, "rewards/margins": 15.119882583618164, "rewards/real": -3.8910484313964844, "step": 2630 }, { "epoch": 1.7, "learning_rate": 2.413711021185432e-07, "logits/generated": -1.8732038736343384, "logits/real": -1.8730173110961914, "logps/generated": -600.4080810546875, "logps/real": -323.9451904296875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/generated": -20.80618667602539, "rewards/margins": 17.078380584716797, "rewards/real": -3.7278037071228027, "step": 2640 }, { "epoch": 1.7, "learning_rate": 2.401809093073078e-07, "logits/generated": -1.8166701793670654, "logits/real": -1.8924024105072021, "logps/generated": -576.9189453125, "logps/real": -372.4144592285156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -19.3643798828125, "rewards/margins": 15.9496488571167, "rewards/real": -3.4147305488586426, "step": 2650 }, { "epoch": 1.71, "learning_rate": 2.3899071649607236e-07, "logits/generated": -1.8222957849502563, "logits/real": -1.7936245203018188, "logps/generated": -639.0947265625, "logps/real": -437.8866271972656, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -18.86641502380371, "rewards/margins": 15.062356948852539, "rewards/real": -3.8040592670440674, "step": 2660 }, { "epoch": 1.72, "learning_rate": 2.3780052368483693e-07, "logits/generated": -1.6525121927261353, "logits/real": -1.7323639392852783, "logps/generated": -528.6134033203125, "logps/real": -337.75048828125, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.23191261291504, "rewards/margins": 15.418153762817383, "rewards/real": -4.813759803771973, "step": 2670 }, { "epoch": 1.72, "learning_rate": 2.3661033087360152e-07, "logits/generated": -1.8742326498031616, "logits/real": -1.941476583480835, "logps/generated": -597.1447143554688, "logps/real": -358.0196838378906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/generated": -18.783157348632812, "rewards/margins": 15.727224349975586, "rewards/real": -3.055934190750122, "step": 2680 }, { "epoch": 1.73, "learning_rate": 2.354201380623661e-07, "logits/generated": -1.8624379634857178, "logits/real": -1.893402099609375, "logps/generated": -629.6782836914062, "logps/real": -406.6865234375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -20.850439071655273, "rewards/margins": 16.49611473083496, "rewards/real": -4.35432243347168, "step": 2690 }, { "epoch": 1.74, "learning_rate": 2.342299452511307e-07, "logits/generated": -1.8548250198364258, "logits/real": -1.8583017587661743, "logps/generated": -582.7992553710938, "logps/real": -323.04205322265625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -22.348865509033203, "rewards/margins": 18.243560791015625, "rewards/real": -4.105301856994629, "step": 2700 }, { "epoch": 1.74, "learning_rate": 2.3303975243989526e-07, "logits/generated": -1.9024966955184937, "logits/real": -2.017508029937744, "logps/generated": -574.1644287109375, "logps/real": -349.75714111328125, "loss": 0.03, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.862314224243164, "rewards/margins": 17.064558029174805, "rewards/real": -3.7977538108825684, "step": 2710 }, { "epoch": 1.75, "learning_rate": 2.3184955962865982e-07, "logits/generated": -1.9994853734970093, "logits/real": -2.0445475578308105, "logps/generated": -562.1621704101562, "logps/real": -349.68695068359375, "loss": 0.0202, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.807878494262695, "rewards/margins": 17.700210571289062, "rewards/real": -4.107669353485107, "step": 2720 }, { "epoch": 1.75, "learning_rate": 2.306593668174244e-07, "logits/generated": -1.9293378591537476, "logits/real": -2.0241332054138184, "logps/generated": -584.3340454101562, "logps/real": -374.9560546875, "loss": 0.0256, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.921669006347656, "rewards/margins": 16.468935012817383, "rewards/real": -3.452733278274536, "step": 2730 }, { "epoch": 1.76, "learning_rate": 2.2946917400618898e-07, "logits/generated": -1.894095778465271, "logits/real": -1.973131775856018, "logps/generated": -621.0687866210938, "logps/real": -442.6654357910156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -21.7646427154541, "rewards/margins": 17.236825942993164, "rewards/real": -4.527815818786621, "step": 2740 }, { "epoch": 1.77, "learning_rate": 2.2827898119495357e-07, "logits/generated": -1.908696174621582, "logits/real": -1.8996613025665283, "logps/generated": -641.0946044921875, "logps/real": -396.1679992675781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -22.93410873413086, "rewards/margins": 18.131559371948242, "rewards/real": -4.802548885345459, "step": 2750 }, { "epoch": 1.77, "learning_rate": 2.2708878838371816e-07, "logits/generated": -1.887770414352417, "logits/real": -1.9259475469589233, "logps/generated": -641.0634765625, "logps/real": -353.3055419921875, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.32883071899414, "rewards/margins": 18.148107528686523, "rewards/real": -4.18071985244751, "step": 2760 }, { "epoch": 1.78, "learning_rate": 2.2589859557248272e-07, "logits/generated": -1.931406021118164, "logits/real": -1.9512712955474854, "logps/generated": -613.9124755859375, "logps/real": -410.6527404785156, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -21.747358322143555, "rewards/margins": 16.431184768676758, "rewards/real": -5.31617546081543, "step": 2770 }, { "epoch": 1.79, "learning_rate": 2.247084027612473e-07, "logits/generated": -1.9454374313354492, "logits/real": -1.9846471548080444, "logps/generated": -613.2532348632812, "logps/real": -419.8189392089844, "loss": 0.0369, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.502111434936523, "rewards/margins": 19.032825469970703, "rewards/real": -3.4692866802215576, "step": 2780 }, { "epoch": 1.79, "learning_rate": 2.235182099500119e-07, "logits/generated": -1.943305253982544, "logits/real": -1.9613683223724365, "logps/generated": -590.8385620117188, "logps/real": -376.8114013671875, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/generated": -18.899354934692383, "rewards/margins": 15.790725708007812, "rewards/real": -3.108628273010254, "step": 2790 }, { "epoch": 1.8, "learning_rate": 2.2232801713877646e-07, "logits/generated": -1.969151258468628, "logits/real": -2.0842716693878174, "logps/generated": -567.1388549804688, "logps/real": -386.68927001953125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -19.092025756835938, "rewards/margins": 14.683004379272461, "rewards/real": -4.409019947052002, "step": 2800 }, { "epoch": 1.81, "learning_rate": 2.2113782432754105e-07, "logits/generated": -2.0907585620880127, "logits/real": -2.102865219116211, "logps/generated": -599.5132446289062, "logps/real": -384.66351318359375, "loss": 0.0765, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.033435821533203, "rewards/margins": 15.8507080078125, "rewards/real": -4.182726860046387, "step": 2810 }, { "epoch": 1.81, "learning_rate": 2.1994763151630564e-07, "logits/generated": -2.1723508834838867, "logits/real": -2.1705594062805176, "logps/generated": -512.1658935546875, "logps/real": -349.0197448730469, "loss": 0.014, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.58482551574707, "rewards/margins": 14.868623733520508, "rewards/real": -1.716202735900879, "step": 2820 }, { "epoch": 1.82, "learning_rate": 2.187574387050702e-07, "logits/generated": -2.1666464805603027, "logits/real": -2.1792376041412354, "logps/generated": -632.5197143554688, "logps/real": -366.4725646972656, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/generated": -18.97044563293457, "rewards/margins": 16.773412704467773, "rewards/real": -2.1970319747924805, "step": 2830 }, { "epoch": 1.83, "learning_rate": 2.175672458938348e-07, "logits/generated": -2.1748318672180176, "logits/real": -2.1950387954711914, "logps/generated": -549.257568359375, "logps/real": -374.5805969238281, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.554750442504883, "rewards/margins": 15.931724548339844, "rewards/real": -2.6230263710021973, "step": 2840 }, { "epoch": 1.83, "learning_rate": 2.1637705308259938e-07, "logits/generated": -2.2087433338165283, "logits/real": -2.2343385219573975, "logps/generated": -569.7178344726562, "logps/real": -363.35565185546875, "loss": 0.02, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.506458282470703, "rewards/margins": 15.537744522094727, "rewards/real": -2.968712568283081, "step": 2850 }, { "epoch": 1.84, "learning_rate": 2.1518686027136395e-07, "logits/generated": -2.141085624694824, "logits/real": -2.2068240642547607, "logps/generated": -559.6201171875, "logps/real": -355.288818359375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -17.508007049560547, "rewards/margins": 13.753092765808105, "rewards/real": -3.754911422729492, "step": 2860 }, { "epoch": 1.84, "learning_rate": 2.1399666746012854e-07, "logits/generated": -2.1633191108703613, "logits/real": -2.1534600257873535, "logps/generated": -557.6275024414062, "logps/real": -355.95733642578125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/generated": -18.064023971557617, "rewards/margins": 13.931376457214355, "rewards/real": -4.132648468017578, "step": 2870 }, { "epoch": 1.85, "learning_rate": 2.1280647464889313e-07, "logits/generated": -2.146686315536499, "logits/real": -2.154139280319214, "logps/generated": -617.3681030273438, "logps/real": -377.43853759765625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -20.22401237487793, "rewards/margins": 16.329097747802734, "rewards/real": -3.8949122428894043, "step": 2880 }, { "epoch": 1.86, "learning_rate": 2.116162818376577e-07, "logits/generated": -2.074739456176758, "logits/real": -2.1095595359802246, "logps/generated": -574.0628051757812, "logps/real": -360.5137023925781, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -20.636310577392578, "rewards/margins": 17.622827529907227, "rewards/real": -3.013484477996826, "step": 2890 }, { "epoch": 1.86, "learning_rate": 2.1042608902642228e-07, "logits/generated": -2.1206960678100586, "logits/real": -2.20168399810791, "logps/generated": -583.8810424804688, "logps/real": -382.8518371582031, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.312095642089844, "rewards/margins": 15.806414604187012, "rewards/real": -4.505680561065674, "step": 2900 }, { "epoch": 1.87, "learning_rate": 2.0923589621518687e-07, "logits/generated": -2.0229172706604004, "logits/real": -2.0557923316955566, "logps/generated": -597.6458740234375, "logps/real": -395.81646728515625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -20.894927978515625, "rewards/margins": 17.441539764404297, "rewards/real": -3.453387498855591, "step": 2910 }, { "epoch": 1.88, "learning_rate": 2.080457034039514e-07, "logits/generated": -1.9580085277557373, "logits/real": -2.1535487174987793, "logps/generated": -578.9662475585938, "logps/real": -369.46197509765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -18.469118118286133, "rewards/margins": 14.781881332397461, "rewards/real": -3.687236785888672, "step": 2920 }, { "epoch": 1.88, "learning_rate": 2.06855510592716e-07, "logits/generated": -2.0475711822509766, "logits/real": -2.0905933380126953, "logps/generated": -579.7207641601562, "logps/real": -362.5934143066406, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/generated": -19.495128631591797, "rewards/margins": 16.245441436767578, "rewards/real": -3.249690294265747, "step": 2930 }, { "epoch": 1.89, "learning_rate": 2.0566531778148059e-07, "logits/generated": -1.9959481954574585, "logits/real": -2.1353626251220703, "logps/generated": -564.4955444335938, "logps/real": -383.1506042480469, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/generated": -18.533605575561523, "rewards/margins": 16.09157943725586, "rewards/real": -2.4420278072357178, "step": 2940 }, { "epoch": 1.9, "learning_rate": 2.0447512497024515e-07, "logits/generated": -2.084301233291626, "logits/real": -2.0366158485412598, "logps/generated": -518.3314208984375, "logps/real": -350.7093505859375, "loss": 0.0225, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.012195587158203, "rewards/margins": 15.021069526672363, "rewards/real": -2.9911255836486816, "step": 2950 }, { "epoch": 1.9, "learning_rate": 2.0328493215900974e-07, "logits/generated": -2.1363632678985596, "logits/real": -2.122278928756714, "logps/generated": -602.5611572265625, "logps/real": -397.9182434082031, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -20.253149032592773, "rewards/margins": 17.09138298034668, "rewards/real": -3.1617660522460938, "step": 2960 }, { "epoch": 1.91, "learning_rate": 2.0209473934777433e-07, "logits/generated": -2.134699821472168, "logits/real": -2.08998441696167, "logps/generated": -580.0136108398438, "logps/real": -359.6705322265625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -19.094505310058594, "rewards/margins": 15.466270446777344, "rewards/real": -3.628235340118408, "step": 2970 }, { "epoch": 1.92, "learning_rate": 2.0090454653653892e-07, "logits/generated": -2.042788028717041, "logits/real": -2.0056633949279785, "logps/generated": -532.9696044921875, "logps/real": -333.90618896484375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -18.65140151977539, "rewards/margins": 15.834383010864258, "rewards/real": -2.8170199394226074, "step": 2980 }, { "epoch": 1.92, "learning_rate": 1.9971435372530348e-07, "logits/generated": -2.0326991081237793, "logits/real": -2.0445380210876465, "logps/generated": -636.9520263671875, "logps/real": -420.206787109375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -20.007139205932617, "rewards/margins": 16.871540069580078, "rewards/real": -3.1356008052825928, "step": 2990 }, { "epoch": 1.93, "learning_rate": 1.9852416091406807e-07, "logits/generated": -2.16467022895813, "logits/real": -2.151834011077881, "logps/generated": -608.9406127929688, "logps/real": -386.32403564453125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -19.408123016357422, "rewards/margins": 15.358880996704102, "rewards/real": -4.049244403839111, "step": 3000 }, { "epoch": 1.93, "learning_rate": 1.9733396810283266e-07, "logits/generated": -1.994359016418457, "logits/real": -2.083233594894409, "logps/generated": -580.0289916992188, "logps/real": -387.74285888671875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -19.273834228515625, "rewards/margins": 16.140888214111328, "rewards/real": -3.1329457759857178, "step": 3010 }, { "epoch": 1.94, "learning_rate": 1.9614377529159722e-07, "logits/generated": -1.9821665287017822, "logits/real": -1.9590924978256226, "logps/generated": -658.1160278320312, "logps/real": -387.3288879394531, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.883031845092773, "rewards/margins": 18.182018280029297, "rewards/real": -3.7010135650634766, "step": 3020 }, { "epoch": 1.95, "learning_rate": 1.9495358248036181e-07, "logits/generated": -1.8935340642929077, "logits/real": -1.9233297109603882, "logps/generated": -587.504638671875, "logps/real": -379.6001281738281, "loss": 0.0289, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.569808959960938, "rewards/margins": 16.9621639251709, "rewards/real": -4.607644081115723, "step": 3030 }, { "epoch": 1.95, "learning_rate": 1.937633896691264e-07, "logits/generated": -2.0384740829467773, "logits/real": -2.0107827186584473, "logps/generated": -560.0950927734375, "logps/real": -348.64178466796875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/generated": -20.69436264038086, "rewards/margins": 16.582056045532227, "rewards/real": -4.112307548522949, "step": 3040 }, { "epoch": 1.96, "learning_rate": 1.9257319685789097e-07, "logits/generated": -1.8694576025009155, "logits/real": -1.8919403553009033, "logps/generated": -610.2615966796875, "logps/real": -373.0062561035156, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/generated": -21.5854434967041, "rewards/margins": 17.94769287109375, "rewards/real": -3.637749433517456, "step": 3050 }, { "epoch": 1.97, "learning_rate": 1.9138300404665556e-07, "logits/generated": -1.833876371383667, "logits/real": -1.923029899597168, "logps/generated": -625.2069091796875, "logps/real": -445.4774475097656, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/generated": -21.171947479248047, "rewards/margins": 16.788434982299805, "rewards/real": -4.383509635925293, "step": 3060 }, { "epoch": 1.97, "learning_rate": 1.9019281123542015e-07, "logits/generated": -1.8446261882781982, "logits/real": -1.9809293746948242, "logps/generated": -639.4974365234375, "logps/real": -433.82843017578125, "loss": 0.01, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.332645416259766, "rewards/margins": 17.92720603942871, "rewards/real": -4.405437469482422, "step": 3070 }, { "epoch": 1.98, "learning_rate": 1.890026184241847e-07, "logits/generated": -1.794547438621521, "logits/real": -1.902682900428772, "logps/generated": -686.3941650390625, "logps/real": -412.7117614746094, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -23.83676528930664, "rewards/margins": 18.992544174194336, "rewards/real": -4.844220161437988, "step": 3080 }, { "epoch": 1.99, "learning_rate": 1.878124256129493e-07, "logits/generated": -1.771178960800171, "logits/real": -1.8597948551177979, "logps/generated": -650.177490234375, "logps/real": -385.32281494140625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -22.727323532104492, "rewards/margins": 18.738880157470703, "rewards/real": -3.9884445667266846, "step": 3090 }, { "epoch": 1.99, "learning_rate": 1.866222328017139e-07, "logits/generated": -1.7059656381607056, "logits/real": -1.7673842906951904, "logps/generated": -622.1861572265625, "logps/real": -364.33526611328125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -22.290287017822266, "rewards/margins": 17.137935638427734, "rewards/real": -5.152352809906006, "step": 3100 }, { "epoch": 2.0, "learning_rate": 1.8543203999047845e-07, "logits/generated": -1.7726774215698242, "logits/real": -1.9006750583648682, "logps/generated": -613.3416748046875, "logps/real": -358.4165954589844, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.165300369262695, "rewards/margins": 17.081605911254883, "rewards/real": -5.08369255065918, "step": 3110 }, { "epoch": 2.01, "learning_rate": 1.8424184717924304e-07, "logits/generated": -1.7835719585418701, "logits/real": -1.8426719903945923, "logps/generated": -601.4617919921875, "logps/real": -340.8309631347656, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.10822868347168, "rewards/margins": 17.62343978881836, "rewards/real": -4.48478889465332, "step": 3120 }, { "epoch": 2.01, "learning_rate": 1.830516543680076e-07, "logits/generated": -1.864492416381836, "logits/real": -1.931165099143982, "logps/generated": -646.0354614257812, "logps/real": -399.3186340332031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -22.49277114868164, "rewards/margins": 17.703113555908203, "rewards/real": -4.789654731750488, "step": 3130 }, { "epoch": 2.02, "learning_rate": 1.8186146155677217e-07, "logits/generated": -1.6619055271148682, "logits/real": -1.682189702987671, "logps/generated": -651.1900634765625, "logps/real": -430.13775634765625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -23.825260162353516, "rewards/margins": 19.324203491210938, "rewards/real": -4.501051902770996, "step": 3140 }, { "epoch": 2.02, "learning_rate": 1.8067126874553676e-07, "logits/generated": -1.8066043853759766, "logits/real": -1.857184648513794, "logps/generated": -644.6920166015625, "logps/real": -422.68731689453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -23.160465240478516, "rewards/margins": 18.2052001953125, "rewards/real": -4.955265045166016, "step": 3150 }, { "epoch": 2.03, "learning_rate": 1.7948107593430135e-07, "logits/generated": -1.7574580907821655, "logits/real": -1.6988914012908936, "logps/generated": -635.5706176757812, "logps/real": -414.3448791503906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -22.139110565185547, "rewards/margins": 17.725711822509766, "rewards/real": -4.41339635848999, "step": 3160 }, { "epoch": 2.04, "learning_rate": 1.782908831230659e-07, "logits/generated": -1.7514142990112305, "logits/real": -1.833495855331421, "logps/generated": -635.5641479492188, "logps/real": -391.35406494140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -24.829418182373047, "rewards/margins": 19.86545753479004, "rewards/real": -4.963961601257324, "step": 3170 }, { "epoch": 2.04, "learning_rate": 1.771006903118305e-07, "logits/generated": -1.8058671951293945, "logits/real": -1.84176504611969, "logps/generated": -596.8150634765625, "logps/real": -392.7447814941406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.030054092407227, "rewards/margins": 17.652753829956055, "rewards/real": -5.37730073928833, "step": 3180 }, { "epoch": 2.05, "learning_rate": 1.759104975005951e-07, "logits/generated": -1.7943785190582275, "logits/real": -1.8919038772583008, "logps/generated": -616.5109252929688, "logps/real": -351.06903076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -24.05794334411621, "rewards/margins": 18.479902267456055, "rewards/real": -5.578042984008789, "step": 3190 }, { "epoch": 2.06, "learning_rate": 1.7472030468935965e-07, "logits/generated": -1.7733243703842163, "logits/real": -1.8298814296722412, "logps/generated": -644.5895385742188, "logps/real": -346.65716552734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -23.519426345825195, "rewards/margins": 19.296615600585938, "rewards/real": -4.222809791564941, "step": 3200 }, { "epoch": 2.06, "learning_rate": 1.7353011187812424e-07, "logits/generated": -1.7913854122161865, "logits/real": -1.7636489868164062, "logps/generated": -630.2733154296875, "logps/real": -346.322021484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -24.583568572998047, "rewards/margins": 19.213102340698242, "rewards/real": -5.3704681396484375, "step": 3210 }, { "epoch": 2.07, "learning_rate": 1.7233991906688883e-07, "logits/generated": -1.693394422531128, "logits/real": -1.777193307876587, "logps/generated": -647.3377685546875, "logps/real": -437.0379333496094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -24.571914672851562, "rewards/margins": 18.28937530517578, "rewards/real": -6.282541751861572, "step": 3220 }, { "epoch": 2.08, "learning_rate": 1.7114972625565342e-07, "logits/generated": -1.7938833236694336, "logits/real": -1.8682388067245483, "logps/generated": -612.8692016601562, "logps/real": -373.411865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.837581634521484, "rewards/margins": 18.990482330322266, "rewards/real": -4.847100734710693, "step": 3230 }, { "epoch": 2.08, "learning_rate": 1.6995953344441799e-07, "logits/generated": -1.5872770547866821, "logits/real": -1.749686598777771, "logps/generated": -612.15576171875, "logps/real": -354.6670837402344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -24.107717514038086, "rewards/margins": 19.196178436279297, "rewards/real": -4.911539554595947, "step": 3240 }, { "epoch": 2.09, "learning_rate": 1.6876934063318258e-07, "logits/generated": -1.744523048400879, "logits/real": -1.7055385112762451, "logps/generated": -612.4163818359375, "logps/real": -340.0469055175781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.122411727905273, "rewards/margins": 20.10819435119629, "rewards/real": -6.014217376708984, "step": 3250 }, { "epoch": 2.1, "learning_rate": 1.6757914782194717e-07, "logits/generated": -1.761125922203064, "logits/real": -1.713313102722168, "logps/generated": -677.6751708984375, "logps/real": -460.227294921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -24.481826782226562, "rewards/margins": 20.24111557006836, "rewards/real": -4.240714073181152, "step": 3260 }, { "epoch": 2.1, "learning_rate": 1.6638895501071173e-07, "logits/generated": -1.7685257196426392, "logits/real": -1.7595218420028687, "logps/generated": -670.4361572265625, "logps/real": -387.3614807128906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.423667907714844, "rewards/margins": 20.145977020263672, "rewards/real": -5.277690887451172, "step": 3270 }, { "epoch": 2.11, "learning_rate": 1.6519876219947632e-07, "logits/generated": -1.7515497207641602, "logits/real": -1.8205455541610718, "logps/generated": -609.548828125, "logps/real": -398.30230712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.230987548828125, "rewards/margins": 19.289710998535156, "rewards/real": -4.9412760734558105, "step": 3280 }, { "epoch": 2.11, "learning_rate": 1.640085693882409e-07, "logits/generated": -1.7568248510360718, "logits/real": -1.7875878810882568, "logps/generated": -670.6278076171875, "logps/real": -403.04669189453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -25.942584991455078, "rewards/margins": 19.860820770263672, "rewards/real": -6.081762790679932, "step": 3290 }, { "epoch": 2.12, "learning_rate": 1.6281837657700547e-07, "logits/generated": -1.672739028930664, "logits/real": -1.710694670677185, "logps/generated": -709.2623291015625, "logps/real": -427.17083740234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -27.278446197509766, "rewards/margins": 20.85129737854004, "rewards/real": -6.42714786529541, "step": 3300 }, { "epoch": 2.13, "learning_rate": 1.6162818376577006e-07, "logits/generated": -1.7365968227386475, "logits/real": -1.8601267337799072, "logps/generated": -647.3109130859375, "logps/real": -408.20904541015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -24.648662567138672, "rewards/margins": 18.514421463012695, "rewards/real": -6.134242057800293, "step": 3310 }, { "epoch": 2.13, "learning_rate": 1.6043799095453465e-07, "logits/generated": -1.7646595239639282, "logits/real": -1.7028201818466187, "logps/generated": -615.2380981445312, "logps/real": -392.93438720703125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -23.260175704956055, "rewards/margins": 17.600215911865234, "rewards/real": -5.659959316253662, "step": 3320 }, { "epoch": 2.14, "learning_rate": 1.592477981432992e-07, "logits/generated": -1.677546501159668, "logits/real": -1.7684322595596313, "logps/generated": -675.1948852539062, "logps/real": -372.6117248535156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -28.304229736328125, "rewards/margins": 22.81051254272461, "rewards/real": -5.493716239929199, "step": 3330 }, { "epoch": 2.15, "learning_rate": 1.5805760533206378e-07, "logits/generated": -1.6419671773910522, "logits/real": -1.754093885421753, "logps/generated": -710.6311645507812, "logps/real": -412.677001953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -26.5703125, "rewards/margins": 20.43376922607422, "rewards/real": -6.136545658111572, "step": 3340 }, { "epoch": 2.15, "learning_rate": 1.5686741252082837e-07, "logits/generated": -1.6100307703018188, "logits/real": -1.7829450368881226, "logps/generated": -626.542236328125, "logps/real": -390.32904052734375, "loss": 0.0039, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.756441116333008, "rewards/margins": 18.904939651489258, "rewards/real": -3.85149884223938, "step": 3350 }, { "epoch": 2.16, "learning_rate": 1.5567721970959293e-07, "logits/generated": -1.7627454996109009, "logits/real": -1.9145416021347046, "logps/generated": -677.59765625, "logps/real": -411.33648681640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -24.147655487060547, "rewards/margins": 19.63693618774414, "rewards/real": -4.51071834564209, "step": 3360 }, { "epoch": 2.17, "learning_rate": 1.5448702689835752e-07, "logits/generated": -1.6949329376220703, "logits/real": -1.7547237873077393, "logps/generated": -625.1910400390625, "logps/real": -367.7977600097656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -23.851118087768555, "rewards/margins": 19.71878433227539, "rewards/real": -4.132336616516113, "step": 3370 }, { "epoch": 2.17, "learning_rate": 1.532968340871221e-07, "logits/generated": -1.744741439819336, "logits/real": -1.664894700050354, "logps/generated": -640.474853515625, "logps/real": -399.55108642578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.19083023071289, "rewards/margins": 18.417896270751953, "rewards/real": -4.772933006286621, "step": 3380 }, { "epoch": 2.18, "learning_rate": 1.5210664127588667e-07, "logits/generated": -1.662096381187439, "logits/real": -1.8298946619033813, "logps/generated": -631.4981689453125, "logps/real": -397.19586181640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -23.526195526123047, "rewards/margins": 19.6595516204834, "rewards/real": -3.866642475128174, "step": 3390 }, { "epoch": 2.19, "learning_rate": 1.5091644846465126e-07, "logits/generated": -1.7289931774139404, "logits/real": -1.8209636211395264, "logps/generated": -650.0264892578125, "logps/real": -391.0191345214844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -22.553695678710938, "rewards/margins": 18.257787704467773, "rewards/real": -4.295907974243164, "step": 3400 }, { "epoch": 2.19, "learning_rate": 1.4972625565341585e-07, "logits/generated": -1.563467025756836, "logits/real": -1.7469037771224976, "logps/generated": -654.3523559570312, "logps/real": -378.0865783691406, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -25.712825775146484, "rewards/margins": 20.601755142211914, "rewards/real": -5.1110734939575195, "step": 3410 }, { "epoch": 2.2, "learning_rate": 1.4853606284218042e-07, "logits/generated": -1.7174959182739258, "logits/real": -1.8672186136245728, "logps/generated": -646.060791015625, "logps/real": -402.63372802734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.062244415283203, "rewards/margins": 20.159557342529297, "rewards/real": -5.902686595916748, "step": 3420 }, { "epoch": 2.2, "learning_rate": 1.47345870030945e-07, "logits/generated": -1.7195484638214111, "logits/real": -1.7659461498260498, "logps/generated": -670.35693359375, "logps/real": -430.11724853515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -25.210329055786133, "rewards/margins": 19.280933380126953, "rewards/real": -5.92939567565918, "step": 3430 }, { "epoch": 2.21, "learning_rate": 1.461556772197096e-07, "logits/generated": -1.6868633031845093, "logits/real": -1.7503039836883545, "logps/generated": -666.7827758789062, "logps/real": -404.4327087402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.964031219482422, "rewards/margins": 18.93131446838379, "rewards/real": -6.03271484375, "step": 3440 }, { "epoch": 2.22, "learning_rate": 1.4496548440847416e-07, "logits/generated": -1.5785818099975586, "logits/real": -1.695892572402954, "logps/generated": -671.3966674804688, "logps/real": -425.050537109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -28.694400787353516, "rewards/margins": 22.770051956176758, "rewards/real": -5.924350261688232, "step": 3450 }, { "epoch": 2.22, "learning_rate": 1.4377529159723875e-07, "logits/generated": -1.7265466451644897, "logits/real": -1.7282949686050415, "logps/generated": -644.9312744140625, "logps/real": -385.41571044921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.46233558654785, "rewards/margins": 20.13895034790039, "rewards/real": -5.323385238647461, "step": 3460 }, { "epoch": 2.23, "learning_rate": 1.4258509878600334e-07, "logits/generated": -1.606693983078003, "logits/real": -1.7219680547714233, "logps/generated": -651.696533203125, "logps/real": -405.09185791015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.30702781677246, "rewards/margins": 19.77613067626953, "rewards/real": -5.53089714050293, "step": 3470 }, { "epoch": 2.24, "learning_rate": 1.4139490597476793e-07, "logits/generated": -1.691404104232788, "logits/real": -1.7261533737182617, "logps/generated": -705.8663330078125, "logps/real": -424.4842224121094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -28.195613861083984, "rewards/margins": 22.152070999145508, "rewards/real": -6.043543815612793, "step": 3480 }, { "epoch": 2.24, "learning_rate": 1.402047131635325e-07, "logits/generated": -1.5611227750778198, "logits/real": -1.7058401107788086, "logps/generated": -670.8802490234375, "logps/real": -413.3358459472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.489639282226562, "rewards/margins": 20.785070419311523, "rewards/real": -5.704569339752197, "step": 3490 }, { "epoch": 2.25, "learning_rate": 1.3901452035229708e-07, "logits/generated": -1.512269377708435, "logits/real": -1.68305242061615, "logps/generated": -571.3214111328125, "logps/real": -344.5645751953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -24.5626277923584, "rewards/margins": 18.92730140686035, "rewards/real": -5.6353278160095215, "step": 3500 }, { "epoch": 2.26, "learning_rate": 1.3782432754106167e-07, "logits/generated": -1.530753493309021, "logits/real": -1.6607004404067993, "logps/generated": -622.2992553710938, "logps/real": -382.42303466796875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.785675048828125, "rewards/margins": 19.25429344177246, "rewards/real": -6.531381130218506, "step": 3510 }, { "epoch": 2.26, "learning_rate": 1.3663413472982623e-07, "logits/generated": -1.732834815979004, "logits/real": -1.736498236656189, "logps/generated": -631.5408935546875, "logps/real": -347.4884033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.657485961914062, "rewards/margins": 20.199687957763672, "rewards/real": -6.457800388336182, "step": 3520 }, { "epoch": 2.27, "learning_rate": 1.354439419185908e-07, "logits/generated": -1.5467314720153809, "logits/real": -1.6046216487884521, "logps/generated": -634.9423828125, "logps/real": -400.60052490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.31796646118164, "rewards/margins": 20.177886962890625, "rewards/real": -7.140076637268066, "step": 3530 }, { "epoch": 2.28, "learning_rate": 1.3425374910735539e-07, "logits/generated": -1.599615216255188, "logits/real": -1.7260444164276123, "logps/generated": -686.1055908203125, "logps/real": -475.2167053222656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -26.89980697631836, "rewards/margins": 20.732288360595703, "rewards/real": -6.167518615722656, "step": 3540 }, { "epoch": 2.28, "learning_rate": 1.3306355629611995e-07, "logits/generated": -1.519061803817749, "logits/real": -1.6003974676132202, "logps/generated": -736.18408203125, "logps/real": -413.788330078125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -29.17459487915039, "rewards/margins": 22.599679946899414, "rewards/real": -6.574913024902344, "step": 3550 }, { "epoch": 2.29, "learning_rate": 1.3187336348488454e-07, "logits/generated": -1.5416706800460815, "logits/real": -1.5915443897247314, "logps/generated": -693.075927734375, "logps/real": -461.91339111328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -26.290576934814453, "rewards/margins": 20.055639266967773, "rewards/real": -6.234936237335205, "step": 3560 }, { "epoch": 2.29, "learning_rate": 1.3068317067364913e-07, "logits/generated": -1.521410584449768, "logits/real": -1.6385908126831055, "logps/generated": -643.1195068359375, "logps/real": -369.3460998535156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -26.421768188476562, "rewards/margins": 20.350460052490234, "rewards/real": -6.071305274963379, "step": 3570 }, { "epoch": 2.3, "learning_rate": 1.294929778624137e-07, "logits/generated": -1.5358374118804932, "logits/real": -1.6597099304199219, "logps/generated": -689.6113891601562, "logps/real": -466.4942321777344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -24.892501831054688, "rewards/margins": 19.115379333496094, "rewards/real": -5.777121543884277, "step": 3580 }, { "epoch": 2.31, "learning_rate": 1.2830278505117828e-07, "logits/generated": -1.406715989112854, "logits/real": -1.4844402074813843, "logps/generated": -658.787353515625, "logps/real": -436.83636474609375, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.258020401000977, "rewards/margins": 21.475244522094727, "rewards/real": -6.782778739929199, "step": 3590 }, { "epoch": 2.31, "learning_rate": 1.2711259223994287e-07, "logits/generated": -1.3396766185760498, "logits/real": -1.4809544086456299, "logps/generated": -681.5261840820312, "logps/real": -404.4817199707031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -27.8902645111084, "rewards/margins": 21.514251708984375, "rewards/real": -6.376011848449707, "step": 3600 }, { "epoch": 2.32, "learning_rate": 1.2592239942870743e-07, "logits/generated": -1.3948055505752563, "logits/real": -1.5513432025909424, "logps/generated": -698.7779541015625, "logps/real": -433.32244873046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -27.659128189086914, "rewards/margins": 20.854068756103516, "rewards/real": -6.805060386657715, "step": 3610 }, { "epoch": 2.33, "learning_rate": 1.2473220661747202e-07, "logits/generated": -1.5047938823699951, "logits/real": -1.5021181106567383, "logps/generated": -733.7626342773438, "logps/real": -459.6304626464844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -30.57221031188965, "rewards/margins": 21.513500213623047, "rewards/real": -9.058713912963867, "step": 3620 }, { "epoch": 2.33, "learning_rate": 1.2354201380623661e-07, "logits/generated": -1.440640926361084, "logits/real": -1.4846980571746826, "logps/generated": -692.8338623046875, "logps/real": -438.29180908203125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -28.915645599365234, "rewards/margins": 19.977619171142578, "rewards/real": -8.93802547454834, "step": 3630 }, { "epoch": 2.34, "learning_rate": 1.2235182099500118e-07, "logits/generated": -1.4438087940216064, "logits/real": -1.5675140619277954, "logps/generated": -683.9814453125, "logps/real": -481.3240661621094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -28.231365203857422, "rewards/margins": 20.71581268310547, "rewards/real": -7.515552520751953, "step": 3640 }, { "epoch": 2.35, "learning_rate": 1.2116162818376577e-07, "logits/generated": -1.4532592296600342, "logits/real": -1.466347336769104, "logps/generated": -653.7462158203125, "logps/real": -410.3941955566406, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -27.256484985351562, "rewards/margins": 19.569355010986328, "rewards/real": -7.687130928039551, "step": 3650 }, { "epoch": 2.35, "learning_rate": 1.1997143537253036e-07, "logits/generated": -1.519970178604126, "logits/real": -1.6325457096099854, "logps/generated": -666.7601318359375, "logps/real": -440.3809509277344, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.8579044342041, "rewards/margins": 21.10441780090332, "rewards/real": -7.753486633300781, "step": 3660 }, { "epoch": 2.36, "learning_rate": 1.1878124256129493e-07, "logits/generated": -1.404447317123413, "logits/real": -1.4850072860717773, "logps/generated": -646.6796875, "logps/real": -382.3814697265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -27.619770050048828, "rewards/margins": 19.931453704833984, "rewards/real": -7.688315391540527, "step": 3670 }, { "epoch": 2.37, "learning_rate": 1.175910497500595e-07, "logits/generated": -1.465595006942749, "logits/real": -1.5086153745651245, "logps/generated": -713.1724243164062, "logps/real": -460.9185485839844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -30.117782592773438, "rewards/margins": 21.637718200683594, "rewards/real": -8.480066299438477, "step": 3680 }, { "epoch": 2.37, "learning_rate": 1.1640085693882409e-07, "logits/generated": -1.3850997686386108, "logits/real": -1.4538679122924805, "logps/generated": -767.3414306640625, "logps/real": -462.62030029296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -33.52153396606445, "rewards/margins": 23.91811180114746, "rewards/real": -9.603418350219727, "step": 3690 }, { "epoch": 2.38, "learning_rate": 1.1521066412758866e-07, "logits/generated": -1.4143835306167603, "logits/real": -1.5478150844573975, "logps/generated": -652.1123657226562, "logps/real": -392.4671630859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -30.432504653930664, "rewards/margins": 21.238384246826172, "rewards/real": -9.194117546081543, "step": 3700 }, { "epoch": 2.38, "learning_rate": 1.1402047131635324e-07, "logits/generated": -1.4064255952835083, "logits/real": -1.4616397619247437, "logps/generated": -666.3732299804688, "logps/real": -342.7879333496094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -29.1993465423584, "rewards/margins": 22.13344955444336, "rewards/real": -7.0659003257751465, "step": 3710 }, { "epoch": 2.39, "learning_rate": 1.1283027850511783e-07, "logits/generated": -1.3749884366989136, "logits/real": -1.4327641725540161, "logps/generated": -699.9730224609375, "logps/real": -428.4892578125, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.05487060546875, "rewards/margins": 22.448055267333984, "rewards/real": -8.606815338134766, "step": 3720 }, { "epoch": 2.4, "learning_rate": 1.116400856938824e-07, "logits/generated": -1.3594131469726562, "logits/real": -1.515339732170105, "logps/generated": -683.3230590820312, "logps/real": -435.6929626464844, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -30.349924087524414, "rewards/margins": 22.398181915283203, "rewards/real": -7.951746940612793, "step": 3730 }, { "epoch": 2.4, "learning_rate": 1.1044989288264698e-07, "logits/generated": -1.3991297483444214, "logits/real": -1.5862958431243896, "logps/generated": -739.5543212890625, "logps/real": -410.45501708984375, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.497798919677734, "rewards/margins": 21.733530044555664, "rewards/real": -8.764264106750488, "step": 3740 }, { "epoch": 2.41, "learning_rate": 1.0925970007141157e-07, "logits/generated": -1.4209661483764648, "logits/real": -1.554810643196106, "logps/generated": -690.9508056640625, "logps/real": -424.572265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -28.3098201751709, "rewards/margins": 19.752222061157227, "rewards/real": -8.557598114013672, "step": 3750 }, { "epoch": 2.42, "learning_rate": 1.0806950726017615e-07, "logits/generated": -1.4447872638702393, "logits/real": -1.6798250675201416, "logps/generated": -628.6717529296875, "logps/real": -394.5265808105469, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -28.025936126708984, "rewards/margins": 20.235803604125977, "rewards/real": -7.790134429931641, "step": 3760 }, { "epoch": 2.42, "learning_rate": 1.0687931444894072e-07, "logits/generated": -1.532257318496704, "logits/real": -1.6047290563583374, "logps/generated": -702.6226806640625, "logps/real": -396.7612609863281, "loss": 0.0028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.113338470458984, "rewards/margins": 22.376953125, "rewards/real": -7.73638916015625, "step": 3770 }, { "epoch": 2.43, "learning_rate": 1.056891216377053e-07, "logits/generated": -1.4834333658218384, "logits/real": -1.5966551303863525, "logps/generated": -713.3619995117188, "logps/real": -440.79095458984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -29.308202743530273, "rewards/margins": 21.465810775756836, "rewards/real": -7.842390537261963, "step": 3780 }, { "epoch": 2.44, "learning_rate": 1.0449892882646988e-07, "logits/generated": -1.524183988571167, "logits/real": -1.608907699584961, "logps/generated": -710.7420654296875, "logps/real": -489.75665283203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -29.5823917388916, "rewards/margins": 21.41897201538086, "rewards/real": -8.163420677185059, "step": 3790 }, { "epoch": 2.44, "learning_rate": 1.0330873601523445e-07, "logits/generated": -1.4193501472473145, "logits/real": -1.5115816593170166, "logps/generated": -760.3106689453125, "logps/real": -442.89898681640625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -30.945148468017578, "rewards/margins": 22.820213317871094, "rewards/real": -8.124935150146484, "step": 3800 }, { "epoch": 2.45, "learning_rate": 1.0211854320399904e-07, "logits/generated": -1.339179277420044, "logits/real": -1.4539612531661987, "logps/generated": -684.4679565429688, "logps/real": -414.834716796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -30.45599937438965, "rewards/margins": 22.370563507080078, "rewards/real": -8.085436820983887, "step": 3810 }, { "epoch": 2.46, "learning_rate": 1.0092835039276362e-07, "logits/generated": -1.4860432147979736, "logits/real": -1.602423071861267, "logps/generated": -673.7472534179688, "logps/real": -382.69085693359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -30.125503540039062, "rewards/margins": 23.682289123535156, "rewards/real": -6.443214416503906, "step": 3820 }, { "epoch": 2.46, "learning_rate": 9.973815758152821e-08, "logits/generated": -1.5302735567092896, "logits/real": -1.5981000661849976, "logps/generated": -623.1529541015625, "logps/real": -385.8594970703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.0240478515625, "rewards/margins": 20.35898208618164, "rewards/real": -6.665063381195068, "step": 3830 }, { "epoch": 2.47, "learning_rate": 9.854796477029279e-08, "logits/generated": -1.557305932044983, "logits/real": -1.6370735168457031, "logps/generated": -734.8917846679688, "logps/real": -417.2500915527344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -30.936962127685547, "rewards/margins": 23.65032386779785, "rewards/real": -7.286639213562012, "step": 3840 }, { "epoch": 2.47, "learning_rate": 9.735777195905736e-08, "logits/generated": -1.5963976383209229, "logits/real": -1.646400809288025, "logps/generated": -621.7203979492188, "logps/real": -350.34271240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.403676986694336, "rewards/margins": 20.56957244873047, "rewards/real": -6.834105491638184, "step": 3850 }, { "epoch": 2.48, "learning_rate": 9.616757914782195e-08, "logits/generated": -1.6388921737670898, "logits/real": -1.738226294517517, "logps/generated": -675.3317260742188, "logps/real": -426.2586975097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.32644271850586, "rewards/margins": 20.16423797607422, "rewards/real": -7.162204742431641, "step": 3860 }, { "epoch": 2.49, "learning_rate": 9.497738633658653e-08, "logits/generated": -1.472826600074768, "logits/real": -1.6667178869247437, "logps/generated": -635.9212036132812, "logps/real": -377.46685791015625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -27.496978759765625, "rewards/margins": 20.11826515197754, "rewards/real": -7.3787126541137695, "step": 3870 }, { "epoch": 2.49, "learning_rate": 9.378719352535109e-08, "logits/generated": -1.475178837776184, "logits/real": -1.5875985622406006, "logps/generated": -704.7142944335938, "logps/real": -400.84521484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -29.746139526367188, "rewards/margins": 20.98758316040039, "rewards/real": -8.758556365966797, "step": 3880 }, { "epoch": 2.5, "learning_rate": 9.259700071411568e-08, "logits/generated": -1.5485643148422241, "logits/real": -1.5439013242721558, "logps/generated": -741.1964111328125, "logps/real": -408.63330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.4178409576416, "rewards/margins": 23.00247573852539, "rewards/real": -7.415367126464844, "step": 3890 }, { "epoch": 2.51, "learning_rate": 9.140680790288026e-08, "logits/generated": -1.4215214252471924, "logits/real": -1.5626184940338135, "logps/generated": -667.1637573242188, "logps/real": -382.75238037109375, "loss": 0.007, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.500041961669922, "rewards/margins": 21.99424934387207, "rewards/real": -7.505797386169434, "step": 3900 }, { "epoch": 2.51, "learning_rate": 9.021661509164484e-08, "logits/generated": -1.4949233531951904, "logits/real": -1.5658118724822998, "logps/generated": -680.2150268554688, "logps/real": -470.9717712402344, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -28.54837989807129, "rewards/margins": 21.074321746826172, "rewards/real": -7.474058628082275, "step": 3910 }, { "epoch": 2.52, "learning_rate": 8.902642228040942e-08, "logits/generated": -1.4464380741119385, "logits/real": -1.5153313875198364, "logps/generated": -716.2271728515625, "logps/real": -413.4552307128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.981014251708984, "rewards/margins": 22.682100296020508, "rewards/real": -7.298914909362793, "step": 3920 }, { "epoch": 2.53, "learning_rate": 8.7836229469174e-08, "logits/generated": -1.4447425603866577, "logits/real": -1.56985342502594, "logps/generated": -757.6671142578125, "logps/real": -408.1660461425781, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -32.84424591064453, "rewards/margins": 24.225845336914062, "rewards/real": -8.618400573730469, "step": 3930 }, { "epoch": 2.53, "learning_rate": 8.664603665793858e-08, "logits/generated": -1.6285253763198853, "logits/real": -1.7806179523468018, "logps/generated": -676.8280029296875, "logps/real": -428.38995361328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -27.158214569091797, "rewards/margins": 20.443370819091797, "rewards/real": -6.714838981628418, "step": 3940 }, { "epoch": 2.54, "learning_rate": 8.545584384670317e-08, "logits/generated": -1.581805944442749, "logits/real": -1.6949933767318726, "logps/generated": -665.0941162109375, "logps/real": -385.68133544921875, "loss": 0.0063, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.212072372436523, "rewards/margins": 21.672542572021484, "rewards/real": -6.539525508880615, "step": 3950 }, { "epoch": 2.54, "learning_rate": 8.426565103546774e-08, "logits/generated": -1.4136435985565186, "logits/real": -1.5828819274902344, "logps/generated": -684.5712280273438, "logps/real": -426.00726318359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -27.231287002563477, "rewards/margins": 20.253149032592773, "rewards/real": -6.978137016296387, "step": 3960 }, { "epoch": 2.55, "learning_rate": 8.307545822423233e-08, "logits/generated": -1.4507461786270142, "logits/real": -1.5602095127105713, "logps/generated": -665.6134033203125, "logps/real": -367.7745361328125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -27.6043701171875, "rewards/margins": 20.994644165039062, "rewards/real": -6.6097259521484375, "step": 3970 }, { "epoch": 2.56, "learning_rate": 8.18852654129969e-08, "logits/generated": -1.6215112209320068, "logits/real": -1.6469926834106445, "logps/generated": -666.5162963867188, "logps/real": -402.77227783203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.933940887451172, "rewards/margins": 20.598087310791016, "rewards/real": -7.335852146148682, "step": 3980 }, { "epoch": 2.56, "learning_rate": 8.069507260176147e-08, "logits/generated": -1.5119495391845703, "logits/real": -1.6007074117660522, "logps/generated": -637.7647094726562, "logps/real": -355.0984802246094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.141719818115234, "rewards/margins": 20.317394256591797, "rewards/real": -6.8243231773376465, "step": 3990 }, { "epoch": 2.57, "learning_rate": 7.950487979052606e-08, "logits/generated": -1.5609657764434814, "logits/real": -1.6627038717269897, "logps/generated": -708.6666259765625, "logps/real": -415.657470703125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -30.411911010742188, "rewards/margins": 22.86978530883789, "rewards/real": -7.542126655578613, "step": 4000 }, { "epoch": 2.58, "learning_rate": 7.831468697929064e-08, "logits/generated": -1.4143073558807373, "logits/real": -1.4475321769714355, "logps/generated": -650.6522216796875, "logps/real": -392.54345703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -25.745223999023438, "rewards/margins": 20.270931243896484, "rewards/real": -5.474294185638428, "step": 4010 }, { "epoch": 2.58, "learning_rate": 7.712449416805522e-08, "logits/generated": -1.4251785278320312, "logits/real": -1.529900074005127, "logps/generated": -684.9102172851562, "logps/real": -411.81207275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.357402801513672, "rewards/margins": 19.91077995300293, "rewards/real": -6.4466233253479, "step": 4020 }, { "epoch": 2.59, "learning_rate": 7.59343013568198e-08, "logits/generated": -1.4817150831222534, "logits/real": -1.5895212888717651, "logps/generated": -647.0018310546875, "logps/real": -350.5855407714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.597564697265625, "rewards/margins": 23.217004776000977, "rewards/real": -6.38055944442749, "step": 4030 }, { "epoch": 2.6, "learning_rate": 7.474410854558438e-08, "logits/generated": -1.5963512659072876, "logits/real": -1.637025237083435, "logps/generated": -596.042724609375, "logps/real": -327.88970947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.45895767211914, "rewards/margins": 19.825878143310547, "rewards/real": -6.633078098297119, "step": 4040 }, { "epoch": 2.6, "learning_rate": 7.355391573434896e-08, "logits/generated": -1.638891577720642, "logits/real": -1.7146186828613281, "logps/generated": -687.0392456054688, "logps/real": -365.47528076171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.7447509765625, "rewards/margins": 20.9066219329834, "rewards/real": -6.838125705718994, "step": 4050 }, { "epoch": 2.61, "learning_rate": 7.236372292311355e-08, "logits/generated": -1.4594347476959229, "logits/real": -1.6632425785064697, "logps/generated": -696.3617553710938, "logps/real": -408.8171081542969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -27.696395874023438, "rewards/margins": 20.56673812866211, "rewards/real": -7.129660129547119, "step": 4060 }, { "epoch": 2.62, "learning_rate": 7.117353011187813e-08, "logits/generated": -1.4063997268676758, "logits/real": -1.4948168992996216, "logps/generated": -661.9642333984375, "logps/real": -434.97442626953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.784564971923828, "rewards/margins": 17.620466232299805, "rewards/real": -8.164094924926758, "step": 4070 }, { "epoch": 2.62, "learning_rate": 6.998333730064269e-08, "logits/generated": -1.438319444656372, "logits/real": -1.5320520401000977, "logps/generated": -679.9823608398438, "logps/real": -425.42413330078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -28.671300888061523, "rewards/margins": 21.72645378112793, "rewards/real": -6.944846153259277, "step": 4080 }, { "epoch": 2.63, "learning_rate": 6.879314448940728e-08, "logits/generated": -1.365595817565918, "logits/real": -1.505392074584961, "logps/generated": -624.4268798828125, "logps/real": -387.62554931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.39801597595215, "rewards/margins": 20.606674194335938, "rewards/real": -6.791341304779053, "step": 4090 }, { "epoch": 2.63, "learning_rate": 6.760295167817185e-08, "logits/generated": -1.6268279552459717, "logits/real": -1.6268571615219116, "logps/generated": -725.8529052734375, "logps/real": -467.90618896484375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -26.912384033203125, "rewards/margins": 20.454193115234375, "rewards/real": -6.45819616317749, "step": 4100 }, { "epoch": 2.64, "learning_rate": 6.641275886693644e-08, "logits/generated": -1.4818575382232666, "logits/real": -1.5495567321777344, "logps/generated": -605.5071411132812, "logps/real": -372.8101501464844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -26.420181274414062, "rewards/margins": 19.029685974121094, "rewards/real": -7.390494346618652, "step": 4110 }, { "epoch": 2.65, "learning_rate": 6.522256605570102e-08, "logits/generated": -1.5707738399505615, "logits/real": -1.6774705648422241, "logps/generated": -636.1964111328125, "logps/real": -391.5029602050781, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -27.28677749633789, "rewards/margins": 21.367889404296875, "rewards/real": -5.918887138366699, "step": 4120 }, { "epoch": 2.65, "learning_rate": 6.40323732444656e-08, "logits/generated": -1.545809030532837, "logits/real": -1.5913245677947998, "logps/generated": -630.56640625, "logps/real": -422.04205322265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -24.0881404876709, "rewards/margins": 18.995553970336914, "rewards/real": -5.092586040496826, "step": 4130 }, { "epoch": 2.66, "learning_rate": 6.284218043323019e-08, "logits/generated": -1.59109365940094, "logits/real": -1.6737741231918335, "logps/generated": -642.7061157226562, "logps/real": -410.7779235839844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -25.49496841430664, "rewards/margins": 19.26400375366211, "rewards/real": -6.230964660644531, "step": 4140 }, { "epoch": 2.67, "learning_rate": 6.165198762199476e-08, "logits/generated": -1.633514404296875, "logits/real": -1.619410514831543, "logps/generated": -639.6582641601562, "logps/real": -386.4176330566406, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -24.464160919189453, "rewards/margins": 18.57771110534668, "rewards/real": -5.886451721191406, "step": 4150 }, { "epoch": 2.67, "learning_rate": 6.046179481075934e-08, "logits/generated": -1.4455702304840088, "logits/real": -1.7037875652313232, "logps/generated": -664.431640625, "logps/real": -400.7237548828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.038738250732422, "rewards/margins": 19.530742645263672, "rewards/real": -5.507995128631592, "step": 4160 }, { "epoch": 2.68, "learning_rate": 5.9271601999523916e-08, "logits/generated": -1.495924949645996, "logits/real": -1.673275351524353, "logps/generated": -645.5086669921875, "logps/real": -383.1039123535156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -25.089937210083008, "rewards/margins": 19.57440185546875, "rewards/real": -5.515534400939941, "step": 4170 }, { "epoch": 2.69, "learning_rate": 5.80814091882885e-08, "logits/generated": -1.43355393409729, "logits/real": -1.5794459581375122, "logps/generated": -708.1492309570312, "logps/real": -422.21917724609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -26.978546142578125, "rewards/margins": 21.33310317993164, "rewards/real": -5.645444393157959, "step": 4180 }, { "epoch": 2.69, "learning_rate": 5.689121637705308e-08, "logits/generated": -1.4713923931121826, "logits/real": -1.6138818264007568, "logps/generated": -568.523193359375, "logps/real": -390.0688171386719, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -23.389972686767578, "rewards/margins": 17.943958282470703, "rewards/real": -5.446010589599609, "step": 4190 }, { "epoch": 2.7, "learning_rate": 5.5701023565817666e-08, "logits/generated": -1.5411484241485596, "logits/real": -1.5925050973892212, "logps/generated": -566.3245849609375, "logps/real": -389.99395751953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -23.01774787902832, "rewards/margins": 18.14521598815918, "rewards/real": -4.872531890869141, "step": 4200 }, { "epoch": 2.71, "learning_rate": 5.4510830754582236e-08, "logits/generated": -1.5211843252182007, "logits/real": -1.6060224771499634, "logps/generated": -618.6573486328125, "logps/real": -381.83154296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -25.276790618896484, "rewards/margins": 20.00619888305664, "rewards/real": -5.270589351654053, "step": 4210 }, { "epoch": 2.71, "learning_rate": 5.332063794334682e-08, "logits/generated": -1.4857370853424072, "logits/real": -1.5991318225860596, "logps/generated": -628.9978637695312, "logps/real": -368.8316955566406, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -25.434362411499023, "rewards/margins": 20.840261459350586, "rewards/real": -4.59410285949707, "step": 4220 }, { "epoch": 2.72, "learning_rate": 5.21304451321114e-08, "logits/generated": -1.481233835220337, "logits/real": -1.616990089416504, "logps/generated": -612.8431396484375, "logps/real": -386.7208557128906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -23.88302993774414, "rewards/margins": 19.562503814697266, "rewards/real": -4.32052755355835, "step": 4230 }, { "epoch": 2.72, "learning_rate": 5.0940252320875985e-08, "logits/generated": -1.543163537979126, "logits/real": -1.64615797996521, "logps/generated": -642.1202392578125, "logps/real": -387.37042236328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -22.703163146972656, "rewards/margins": 18.479257583618164, "rewards/real": -4.223905086517334, "step": 4240 }, { "epoch": 2.73, "learning_rate": 4.975005950964056e-08, "logits/generated": -1.5782445669174194, "logits/real": -1.6280380487442017, "logps/generated": -670.2909545898438, "logps/real": -409.8173828125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -25.616764068603516, "rewards/margins": 20.43697738647461, "rewards/real": -5.1797871589660645, "step": 4250 }, { "epoch": 2.74, "learning_rate": 4.855986669840514e-08, "logits/generated": -1.5900815725326538, "logits/real": -1.621788740158081, "logps/generated": -621.515625, "logps/real": -400.93658447265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -24.718387603759766, "rewards/margins": 18.437381744384766, "rewards/real": -6.281005859375, "step": 4260 }, { "epoch": 2.74, "learning_rate": 4.736967388716972e-08, "logits/generated": -1.5760804414749146, "logits/real": -1.6673088073730469, "logps/generated": -629.8677978515625, "logps/real": -367.3181457519531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.90537452697754, "rewards/margins": 19.712932586669922, "rewards/real": -6.192440509796143, "step": 4270 }, { "epoch": 2.75, "learning_rate": 4.61794810759343e-08, "logits/generated": -1.5383670330047607, "logits/real": -1.6774461269378662, "logps/generated": -668.8092041015625, "logps/real": -413.6837463378906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -24.660079956054688, "rewards/margins": 20.008989334106445, "rewards/real": -4.65109395980835, "step": 4280 }, { "epoch": 2.76, "learning_rate": 4.498928826469888e-08, "logits/generated": -1.516428828239441, "logits/real": -1.6732898950576782, "logps/generated": -604.8695068359375, "logps/real": -424.62481689453125, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.037944793701172, "rewards/margins": 18.540569305419922, "rewards/real": -4.497374534606934, "step": 4290 }, { "epoch": 2.76, "learning_rate": 4.3799095453463464e-08, "logits/generated": -1.5560497045516968, "logits/real": -1.6302626132965088, "logps/generated": -725.353271484375, "logps/real": -401.71990966796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.72428321838379, "rewards/margins": 20.166976928710938, "rewards/real": -5.557308197021484, "step": 4300 }, { "epoch": 2.77, "learning_rate": 4.2608902642228033e-08, "logits/generated": -1.5148546695709229, "logits/real": -1.635724425315857, "logps/generated": -560.277587890625, "logps/real": -350.01739501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -22.92128562927246, "rewards/margins": 17.64234733581543, "rewards/real": -5.2789411544799805, "step": 4310 }, { "epoch": 2.78, "learning_rate": 4.1418709830992617e-08, "logits/generated": -1.5678379535675049, "logits/real": -1.6492674350738525, "logps/generated": -651.1310424804688, "logps/real": -392.8611755371094, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -23.585779190063477, "rewards/margins": 17.906591415405273, "rewards/real": -5.679187297821045, "step": 4320 }, { "epoch": 2.78, "learning_rate": 4.02285170197572e-08, "logits/generated": -1.5977767705917358, "logits/real": -1.6969823837280273, "logps/generated": -596.7791748046875, "logps/real": -366.9782409667969, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -24.06852149963379, "rewards/margins": 18.731252670288086, "rewards/real": -5.3372673988342285, "step": 4330 }, { "epoch": 2.79, "learning_rate": 3.903832420852178e-08, "logits/generated": -1.5980104207992554, "logits/real": -1.624751329421997, "logps/generated": -634.3136596679688, "logps/real": -421.5874938964844, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -25.012256622314453, "rewards/margins": 19.190711975097656, "rewards/real": -5.82154655456543, "step": 4340 }, { "epoch": 2.8, "learning_rate": 3.784813139728636e-08, "logits/generated": -1.544721007347107, "logits/real": -1.617582082748413, "logps/generated": -652.9552001953125, "logps/real": -342.71917724609375, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.350351333618164, "rewards/margins": 22.945491790771484, "rewards/real": -4.404857635498047, "step": 4350 }, { "epoch": 2.8, "learning_rate": 3.6657938586050936e-08, "logits/generated": -1.4935457706451416, "logits/real": -1.6021337509155273, "logps/generated": -588.6372680664062, "logps/real": -402.85235595703125, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.209640502929688, "rewards/margins": 19.01520347595215, "rewards/real": -4.19443416595459, "step": 4360 }, { "epoch": 2.81, "learning_rate": 3.546774577481552e-08, "logits/generated": -1.4029728174209595, "logits/real": -1.4847666025161743, "logps/generated": -645.6001586914062, "logps/real": -414.8529357910156, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.182071685791016, "rewards/margins": 19.215055465698242, "rewards/real": -5.967015266418457, "step": 4370 }, { "epoch": 2.81, "learning_rate": 3.42775529635801e-08, "logits/generated": -1.5726209878921509, "logits/real": -1.7626521587371826, "logps/generated": -664.4998779296875, "logps/real": -424.19140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.57777976989746, "rewards/margins": 19.955883026123047, "rewards/real": -4.621894359588623, "step": 4380 }, { "epoch": 2.82, "learning_rate": 3.308736015234468e-08, "logits/generated": -1.5751293897628784, "logits/real": -1.6218827962875366, "logps/generated": -606.050048828125, "logps/real": -415.70599365234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -23.84659767150879, "rewards/margins": 18.291515350341797, "rewards/real": -5.555081844329834, "step": 4390 }, { "epoch": 2.83, "learning_rate": 3.189716734110926e-08, "logits/generated": -1.5750287771224976, "logits/real": -1.6524326801300049, "logps/generated": -662.5819702148438, "logps/real": -409.240966796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.22734832763672, "rewards/margins": 19.554428100585938, "rewards/real": -4.6729207038879395, "step": 4400 }, { "epoch": 2.83, "learning_rate": 3.070697452987384e-08, "logits/generated": -1.434731125831604, "logits/real": -1.5952690839767456, "logps/generated": -627.177001953125, "logps/real": -401.0657653808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.574573516845703, "rewards/margins": 19.57662582397461, "rewards/real": -4.99794864654541, "step": 4410 }, { "epoch": 2.84, "learning_rate": 2.9516781718638418e-08, "logits/generated": -1.5248663425445557, "logits/real": -1.643877387046814, "logps/generated": -664.0775146484375, "logps/real": -432.7522888183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.960874557495117, "rewards/margins": 19.798603057861328, "rewards/real": -5.162272930145264, "step": 4420 }, { "epoch": 2.85, "learning_rate": 2.8326588907402998e-08, "logits/generated": -1.6145492792129517, "logits/real": -1.6489204168319702, "logps/generated": -638.4390869140625, "logps/real": -405.95135498046875, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.591712951660156, "rewards/margins": 19.004825592041016, "rewards/real": -5.586886882781982, "step": 4430 }, { "epoch": 2.85, "learning_rate": 2.7136396096167577e-08, "logits/generated": -1.5042235851287842, "logits/real": -1.6028741598129272, "logps/generated": -621.9290161132812, "logps/real": -364.36456298828125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -26.292139053344727, "rewards/margins": 20.4296817779541, "rewards/real": -5.862456321716309, "step": 4440 }, { "epoch": 2.86, "learning_rate": 2.5946203284932157e-08, "logits/generated": -1.5380438566207886, "logits/real": -1.6364552974700928, "logps/generated": -618.8900756835938, "logps/real": -363.16387939453125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -26.624120712280273, "rewards/margins": 20.857669830322266, "rewards/real": -5.766448974609375, "step": 4450 }, { "epoch": 2.87, "learning_rate": 2.475601047369674e-08, "logits/generated": -1.3604224920272827, "logits/real": -1.5043797492980957, "logps/generated": -601.212158203125, "logps/real": -355.81329345703125, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.482114791870117, "rewards/margins": 20.78329086303711, "rewards/real": -4.698822021484375, "step": 4460 }, { "epoch": 2.87, "learning_rate": 2.3565817662461317e-08, "logits/generated": -1.4521681070327759, "logits/real": -1.6199442148208618, "logps/generated": -686.1697387695312, "logps/real": -380.40020751953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -28.074283599853516, "rewards/margins": 22.777790069580078, "rewards/real": -5.296494007110596, "step": 4470 }, { "epoch": 2.88, "learning_rate": 2.2375624851225897e-08, "logits/generated": -1.5777462720870972, "logits/real": -1.625754714012146, "logps/generated": -577.4441528320312, "logps/real": -390.99676513671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -22.376705169677734, "rewards/margins": 17.67047119140625, "rewards/real": -4.706236839294434, "step": 4480 }, { "epoch": 2.89, "learning_rate": 2.1185432039990476e-08, "logits/generated": -1.5715150833129883, "logits/real": -1.6618340015411377, "logps/generated": -650.3167724609375, "logps/real": -370.7663269042969, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -26.569076538085938, "rewards/margins": 20.809871673583984, "rewards/real": -5.7592034339904785, "step": 4490 }, { "epoch": 2.89, "learning_rate": 1.9995239228755056e-08, "logits/generated": -1.4017701148986816, "logits/real": -1.5704150199890137, "logps/generated": -623.8563232421875, "logps/real": -330.9757080078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -27.09404945373535, "rewards/margins": 21.123863220214844, "rewards/real": -5.970187187194824, "step": 4500 }, { "epoch": 2.9, "learning_rate": 1.880504641751964e-08, "logits/generated": -1.569045066833496, "logits/real": -1.689866065979004, "logps/generated": -648.3955078125, "logps/real": -410.296630859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -24.370418548583984, "rewards/margins": 19.45819091796875, "rewards/real": -4.912228584289551, "step": 4510 }, { "epoch": 2.9, "learning_rate": 1.7614853606284216e-08, "logits/generated": -1.5325770378112793, "logits/real": -1.65109384059906, "logps/generated": -704.2975463867188, "logps/real": -418.112060546875, "loss": 0.0028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.44858169555664, "rewards/margins": 21.82914161682129, "rewards/real": -4.619443893432617, "step": 4520 }, { "epoch": 2.91, "learning_rate": 1.64246607950488e-08, "logits/generated": -1.569603681564331, "logits/real": -1.6199992895126343, "logps/generated": -633.3504028320312, "logps/real": -405.2867126464844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -24.51732635498047, "rewards/margins": 19.26373863220215, "rewards/real": -5.253589153289795, "step": 4530 }, { "epoch": 2.92, "learning_rate": 1.523446798381338e-08, "logits/generated": -1.538140892982483, "logits/real": -1.5134851932525635, "logps/generated": -599.3333740234375, "logps/real": -339.6556091308594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.23720932006836, "rewards/margins": 20.19225311279297, "rewards/real": -6.044954299926758, "step": 4540 }, { "epoch": 2.92, "learning_rate": 1.4044275172577957e-08, "logits/generated": -1.576467752456665, "logits/real": -1.7410484552383423, "logps/generated": -633.7058715820312, "logps/real": -355.918212890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -26.02840232849121, "rewards/margins": 21.529172897338867, "rewards/real": -4.499229907989502, "step": 4550 }, { "epoch": 2.93, "learning_rate": 1.2854082361342537e-08, "logits/generated": -1.480687141418457, "logits/real": -1.6588733196258545, "logps/generated": -614.5526733398438, "logps/real": -412.12744140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -24.828638076782227, "rewards/margins": 19.214387893676758, "rewards/real": -5.614253997802734, "step": 4560 }, { "epoch": 2.94, "learning_rate": 1.1663889550107118e-08, "logits/generated": -1.5496861934661865, "logits/real": -1.7427030801773071, "logps/generated": -634.1248168945312, "logps/real": -392.64141845703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.263072967529297, "rewards/margins": 21.099870681762695, "rewards/real": -5.163203716278076, "step": 4570 }, { "epoch": 2.94, "learning_rate": 1.0473696738871698e-08, "logits/generated": -1.5476195812225342, "logits/real": -1.6109368801116943, "logps/generated": -661.4473876953125, "logps/real": -491.182373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.115245819091797, "rewards/margins": 19.50821304321289, "rewards/real": -6.607035160064697, "step": 4580 }, { "epoch": 2.95, "learning_rate": 9.283503927636276e-09, "logits/generated": -1.514695405960083, "logits/real": -1.6349430084228516, "logps/generated": -645.2971801757812, "logps/real": -423.22979736328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -25.907445907592773, "rewards/margins": 20.990873336791992, "rewards/real": -4.916577339172363, "step": 4590 }, { "epoch": 2.96, "learning_rate": 8.093311116400856e-09, "logits/generated": -1.4776127338409424, "logits/real": -1.5790631771087646, "logps/generated": -644.6243286132812, "logps/real": -405.47015380859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -25.59290885925293, "rewards/margins": 19.970502853393555, "rewards/real": -5.6224045753479, "step": 4600 }, { "epoch": 2.96, "learning_rate": 6.903118305165436e-09, "logits/generated": -1.551004409790039, "logits/real": -1.6629527807235718, "logps/generated": -686.5955810546875, "logps/real": -435.12750244140625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.463531494140625, "rewards/margins": 19.223169326782227, "rewards/real": -6.240363597869873, "step": 4610 }, { "epoch": 2.97, "learning_rate": 5.712925493930016e-09, "logits/generated": -1.4893418550491333, "logits/real": -1.675402283668518, "logps/generated": -659.8289184570312, "logps/real": -389.3727722167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -27.647533416748047, "rewards/margins": 21.779216766357422, "rewards/real": -5.868315696716309, "step": 4620 }, { "epoch": 2.98, "learning_rate": 4.522732682694597e-09, "logits/generated": -1.5567461252212524, "logits/real": -1.7037324905395508, "logps/generated": -679.4031982421875, "logps/real": -418.8092346191406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -23.965381622314453, "rewards/margins": 18.415084838867188, "rewards/real": -5.550297737121582, "step": 4630 }, { "epoch": 2.98, "learning_rate": 3.332539871459176e-09, "logits/generated": -1.3903148174285889, "logits/real": -1.492148756980896, "logps/generated": -663.8366088867188, "logps/real": -389.20098876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -27.28782081604004, "rewards/margins": 21.74622344970703, "rewards/real": -5.541598796844482, "step": 4640 }, { "epoch": 2.99, "learning_rate": 2.1423470602237564e-09, "logits/generated": -1.5899990797042847, "logits/real": -1.632591962814331, "logps/generated": -686.7926025390625, "logps/real": -420.3905334472656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.265411376953125, "rewards/margins": 21.27739143371582, "rewards/real": -4.988020420074463, "step": 4650 }, { "epoch": 2.99, "learning_rate": 9.521542489883362e-10, "logits/generated": -1.5567301511764526, "logits/real": -1.6518385410308838, "logps/generated": -666.6358642578125, "logps/real": -394.54815673828125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -26.553237915039062, "rewards/margins": 22.035541534423828, "rewards/real": -4.517697811126709, "step": 4660 }, { "epoch": 3.0, "step": 4668, "total_flos": 0.0, "train_loss": 0.04980033338522684, "train_runtime": 39160.4052, "train_samples_per_second": 3.814, "train_steps_per_second": 0.119 } ], "logging_steps": 10, "max_steps": 4668, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }