test-spin-iter0 / trainer_state.json
Yifan Wang
Update from wang5617
d8fb1a5
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 4668,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0706638115631692e-09,
"logits/generated": -3.0364484786987305,
"logits/real": -3.0630810260772705,
"logps/generated": -251.72409057617188,
"logps/real": -237.75723266601562,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.070663811563169e-08,
"logits/generated": -2.9856934547424316,
"logits/real": -2.989187240600586,
"logps/generated": -390.6001892089844,
"logps/real": -373.385498046875,
"loss": 0.6943,
"rewards/accuracies": 0.4861111044883728,
"rewards/generated": -0.004033928737044334,
"rewards/margins": 0.010273342952132225,
"rewards/real": 0.006239414215087891,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.141327623126338e-08,
"logits/generated": -3.0185017585754395,
"logits/real": -2.9879310131073,
"logps/generated": -393.1386413574219,
"logps/real": -348.47198486328125,
"loss": 0.6842,
"rewards/accuracies": 0.574999988079071,
"rewards/generated": 0.009772378951311111,
"rewards/margins": 0.008250057697296143,
"rewards/real": 0.018022436648607254,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 3.2119914346895076e-08,
"logits/generated": -3.0177969932556152,
"logits/real": -3.0238332748413086,
"logps/generated": -361.01361083984375,
"logps/real": -317.5545654296875,
"loss": 0.666,
"rewards/accuracies": 0.7124999761581421,
"rewards/generated": 0.04016115143895149,
"rewards/margins": 0.07257900387048721,
"rewards/real": 0.112740159034729,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 4.282655246252676e-08,
"logits/generated": -3.012861967086792,
"logits/real": -3.010136604309082,
"logps/generated": -404.3400573730469,
"logps/real": -321.47833251953125,
"loss": 0.6148,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": 0.1161263957619667,
"rewards/margins": 0.16384394466876984,
"rewards/real": 0.27997034788131714,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 5.353319057815846e-08,
"logits/generated": -3.006298065185547,
"logits/real": -2.9836604595184326,
"logps/generated": -386.39251708984375,
"logps/real": -344.08502197265625,
"loss": 0.5534,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": 0.17066331207752228,
"rewards/margins": 0.3133729100227356,
"rewards/real": 0.48403626680374146,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 6.423982869379015e-08,
"logits/generated": -2.9997777938842773,
"logits/real": -2.991501808166504,
"logps/generated": -408.845703125,
"logps/real": -346.44403076171875,
"loss": 0.4799,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": 0.11928486824035645,
"rewards/margins": 0.6128198504447937,
"rewards/real": 0.7321046590805054,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 7.494646680942184e-08,
"logits/generated": -2.9945003986358643,
"logits/real": -2.9864401817321777,
"logps/generated": -417.6007385253906,
"logps/real": -394.57769775390625,
"loss": 0.412,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -0.056093405932188034,
"rewards/margins": 1.043709635734558,
"rewards/real": 0.9876161813735962,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 8.565310492505352e-08,
"logits/generated": -2.996636390686035,
"logits/real": -2.982595682144165,
"logps/generated": -379.52105712890625,
"logps/real": -327.92724609375,
"loss": 0.3944,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -0.12083463370800018,
"rewards/margins": 0.9464915990829468,
"rewards/real": 0.8256568908691406,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 9.635974304068522e-08,
"logits/generated": -2.9587833881378174,
"logits/real": -2.9507949352264404,
"logps/generated": -392.19256591796875,
"logps/real": -346.1636962890625,
"loss": 0.3626,
"rewards/accuracies": 0.875,
"rewards/generated": -0.1786222904920578,
"rewards/margins": 1.2095777988433838,
"rewards/real": 1.0309556722640991,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 1.0706638115631692e-07,
"logits/generated": -2.9543347358703613,
"logits/real": -2.962299346923828,
"logps/generated": -359.78009033203125,
"logps/real": -339.92010498046875,
"loss": 0.3373,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -0.21250668168067932,
"rewards/margins": 1.3779737949371338,
"rewards/real": 1.1654671430587769,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 1.177730192719486e-07,
"logits/generated": -2.9344518184661865,
"logits/real": -2.9429378509521484,
"logps/generated": -400.2679748535156,
"logps/real": -311.1820983886719,
"loss": 0.28,
"rewards/accuracies": 0.875,
"rewards/generated": -0.47659602761268616,
"rewards/margins": 1.5711917877197266,
"rewards/real": 1.0945957899093628,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 1.284796573875803e-07,
"logits/generated": -2.9682908058166504,
"logits/real": -2.9808101654052734,
"logps/generated": -382.69219970703125,
"logps/real": -332.9756774902344,
"loss": 0.261,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.8699382543563843,
"rewards/margins": 1.8869482278823853,
"rewards/real": 1.017009973526001,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 1.3918629550321198e-07,
"logits/generated": -2.950186014175415,
"logits/real": -2.9397127628326416,
"logps/generated": -397.447021484375,
"logps/real": -314.9403076171875,
"loss": 0.2336,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -1.2718974351882935,
"rewards/margins": 2.2060093879699707,
"rewards/real": 0.9341122508049011,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 1.4989293361884367e-07,
"logits/generated": -2.942518711090088,
"logits/real": -2.9479198455810547,
"logps/generated": -415.5738220214844,
"logps/real": -311.505615234375,
"loss": 0.2199,
"rewards/accuracies": 0.9375,
"rewards/generated": -1.8576176166534424,
"rewards/margins": 2.6989645957946777,
"rewards/real": 0.8413470387458801,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 1.6059957173447535e-07,
"logits/generated": -2.934556007385254,
"logits/real": -2.9276509284973145,
"logps/generated": -408.669189453125,
"logps/real": -376.3493957519531,
"loss": 0.2065,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -1.4676240682601929,
"rewards/margins": 2.4751994609832764,
"rewards/real": 1.007575273513794,
"step": 150
},
{
"epoch": 0.1,
"learning_rate": 1.7130620985010704e-07,
"logits/generated": -2.9072623252868652,
"logits/real": -2.911750078201294,
"logps/generated": -457.24188232421875,
"logps/real": -372.0235595703125,
"loss": 0.173,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -2.4295992851257324,
"rewards/margins": 3.2849929332733154,
"rewards/real": 0.8553940057754517,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 1.8201284796573874e-07,
"logits/generated": -2.930424928665161,
"logits/real": -2.917999744415283,
"logps/generated": -365.8371887207031,
"logps/real": -290.6273498535156,
"loss": 0.1936,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -2.619870662689209,
"rewards/margins": 2.9378836154937744,
"rewards/real": 0.3180127739906311,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 1.9271948608137044e-07,
"logits/generated": -2.9163429737091064,
"logits/real": -2.91850209236145,
"logps/generated": -413.27288818359375,
"logps/real": -344.4951171875,
"loss": 0.1629,
"rewards/accuracies": 0.9375,
"rewards/generated": -3.2495665550231934,
"rewards/margins": 3.9354729652404785,
"rewards/real": 0.6859063506126404,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 2.0342612419700214e-07,
"logits/generated": -2.914929151535034,
"logits/real": -2.8999643325805664,
"logps/generated": -455.07135009765625,
"logps/real": -330.7913818359375,
"loss": 0.1497,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.547805070877075,
"rewards/margins": 3.8215222358703613,
"rewards/real": 0.27371746301651,
"step": 190
},
{
"epoch": 0.13,
"learning_rate": 2.1413276231263384e-07,
"logits/generated": -2.898618698120117,
"logits/real": -2.8733317852020264,
"logps/generated": -466.3199768066406,
"logps/real": -360.9929504394531,
"loss": 0.1471,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.219028472900391,
"rewards/margins": 4.406793594360352,
"rewards/real": 0.18776562809944153,
"step": 200
},
{
"epoch": 0.13,
"learning_rate": 2.248394004282655e-07,
"logits/generated": -2.8917415142059326,
"logits/real": -2.8755955696105957,
"logps/generated": -410.87451171875,
"logps/real": -356.9337463378906,
"loss": 0.1496,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -4.3014631271362305,
"rewards/margins": 4.1496405601501465,
"rewards/real": -0.1518225222826004,
"step": 210
},
{
"epoch": 0.14,
"learning_rate": 2.355460385438972e-07,
"logits/generated": -2.8925869464874268,
"logits/real": -2.8660550117492676,
"logps/generated": -421.72430419921875,
"logps/real": -346.0505065917969,
"loss": 0.1502,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -4.356566429138184,
"rewards/margins": 4.137876987457275,
"rewards/real": -0.21868903934955597,
"step": 220
},
{
"epoch": 0.15,
"learning_rate": 2.462526766595289e-07,
"logits/generated": -2.891606569290161,
"logits/real": -2.8733668327331543,
"logps/generated": -414.7857360839844,
"logps/real": -352.6590881347656,
"loss": 0.1554,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.677205562591553,
"rewards/margins": 4.315895080566406,
"rewards/real": -0.3613104224205017,
"step": 230
},
{
"epoch": 0.15,
"learning_rate": 2.569593147751606e-07,
"logits/generated": -2.8873989582061768,
"logits/real": -2.8477070331573486,
"logps/generated": -446.8787536621094,
"logps/real": -355.8851013183594,
"loss": 0.1372,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -4.631860256195068,
"rewards/margins": 4.302509784698486,
"rewards/real": -0.3293505311012268,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 2.676659528907923e-07,
"logits/generated": -2.8453519344329834,
"logits/real": -2.813788652420044,
"logps/generated": -419.0545959472656,
"logps/real": -332.58184814453125,
"loss": 0.1234,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.903704643249512,
"rewards/margins": 4.795269966125488,
"rewards/real": -0.10843384265899658,
"step": 250
},
{
"epoch": 0.17,
"learning_rate": 2.7837259100642395e-07,
"logits/generated": -2.826523542404175,
"logits/real": -2.7952752113342285,
"logps/generated": -454.9879455566406,
"logps/real": -370.6460876464844,
"loss": 0.1294,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -5.665987491607666,
"rewards/margins": 4.868983268737793,
"rewards/real": -0.7970041632652283,
"step": 260
},
{
"epoch": 0.17,
"learning_rate": 2.890792291220557e-07,
"logits/generated": -2.7900607585906982,
"logits/real": -2.7720229625701904,
"logps/generated": -477.99053955078125,
"logps/real": -324.0495300292969,
"loss": 0.138,
"rewards/accuracies": 0.9375,
"rewards/generated": -6.587684631347656,
"rewards/margins": 5.73649787902832,
"rewards/real": -0.8511865735054016,
"step": 270
},
{
"epoch": 0.18,
"learning_rate": 2.9978586723768735e-07,
"logits/generated": -2.7956130504608154,
"logits/real": -2.7502362728118896,
"logps/generated": -441.34320068359375,
"logps/real": -338.3302917480469,
"loss": 0.1215,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -5.030495643615723,
"rewards/margins": 4.927591800689697,
"rewards/real": -0.10290361940860748,
"step": 280
},
{
"epoch": 0.19,
"learning_rate": 3.1049250535331905e-07,
"logits/generated": -2.776495933532715,
"logits/real": -2.7354507446289062,
"logps/generated": -443.36163330078125,
"logps/real": -347.2231750488281,
"loss": 0.122,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -5.4566240310668945,
"rewards/margins": 5.132528781890869,
"rewards/real": -0.32409486174583435,
"step": 290
},
{
"epoch": 0.19,
"learning_rate": 3.211991434689507e-07,
"logits/generated": -2.705543041229248,
"logits/real": -2.6740341186523438,
"logps/generated": -402.8146667480469,
"logps/real": -273.10748291015625,
"loss": 0.0935,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -6.659946441650391,
"rewards/margins": 5.823763847351074,
"rewards/real": -0.8361822962760925,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 3.3190578158458244e-07,
"logits/generated": -2.7414534091949463,
"logits/real": -2.694044589996338,
"logps/generated": -466.59375,
"logps/real": -307.7261657714844,
"loss": 0.1304,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.890405178070068,
"rewards/margins": 6.506340980529785,
"rewards/real": -0.3840644359588623,
"step": 310
},
{
"epoch": 0.21,
"learning_rate": 3.426124197002141e-07,
"logits/generated": -2.7618134021759033,
"logits/real": -2.7209994792938232,
"logps/generated": -477.1549377441406,
"logps/real": -336.30950927734375,
"loss": 0.1194,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -7.347966194152832,
"rewards/margins": 6.844850063323975,
"rewards/real": -0.5031148791313171,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 3.533190578158458e-07,
"logits/generated": -2.769962787628174,
"logits/real": -2.6862077713012695,
"logps/generated": -476.4065856933594,
"logps/real": -356.8155517578125,
"loss": 0.1259,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -6.444831848144531,
"rewards/margins": 6.0999369621276855,
"rewards/real": -0.3448948264122009,
"step": 330
},
{
"epoch": 0.22,
"learning_rate": 3.640256959314775e-07,
"logits/generated": -2.730198383331299,
"logits/real": -2.729130268096924,
"logps/generated": -452.85626220703125,
"logps/real": -316.28997802734375,
"loss": 0.1254,
"rewards/accuracies": 0.9375,
"rewards/generated": -6.9878082275390625,
"rewards/margins": 6.373122215270996,
"rewards/real": -0.6146861910820007,
"step": 340
},
{
"epoch": 0.22,
"learning_rate": 3.747323340471092e-07,
"logits/generated": -2.700066566467285,
"logits/real": -2.6724190711975098,
"logps/generated": -458.28857421875,
"logps/real": -329.33203125,
"loss": 0.1234,
"rewards/accuracies": 0.9375,
"rewards/generated": -5.48982048034668,
"rewards/margins": 5.636635780334473,
"rewards/real": 0.14681576192378998,
"step": 350
},
{
"epoch": 0.23,
"learning_rate": 3.854389721627409e-07,
"logits/generated": -2.709381103515625,
"logits/real": -2.6651294231414795,
"logps/generated": -462.7777404785156,
"logps/real": -367.77740478515625,
"loss": 0.0883,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.926550388336182,
"rewards/margins": 6.700352668762207,
"rewards/real": -0.22619831562042236,
"step": 360
},
{
"epoch": 0.24,
"learning_rate": 3.961456102783726e-07,
"logits/generated": -2.7424886226654053,
"logits/real": -2.6503500938415527,
"logps/generated": -489.62542724609375,
"logps/real": -340.3368225097656,
"loss": 0.1383,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -7.2433366775512695,
"rewards/margins": 6.5567626953125,
"rewards/real": -0.6865738034248352,
"step": 370
},
{
"epoch": 0.24,
"learning_rate": 4.068522483940043e-07,
"logits/generated": -2.6518828868865967,
"logits/real": -2.578998327255249,
"logps/generated": -491.3388671875,
"logps/real": -354.8604736328125,
"loss": 0.1068,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -8.11082935333252,
"rewards/margins": 6.844748020172119,
"rewards/real": -1.2660824060440063,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 4.175588865096359e-07,
"logits/generated": -2.6482059955596924,
"logits/real": -2.593445301055908,
"logps/generated": -458.67864990234375,
"logps/real": -341.2268981933594,
"loss": 0.1243,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.110146999359131,
"rewards/margins": 6.503669738769531,
"rewards/real": -0.6064783334732056,
"step": 390
},
{
"epoch": 0.26,
"learning_rate": 4.282655246252677e-07,
"logits/generated": -2.6590518951416016,
"logits/real": -2.589247703552246,
"logps/generated": -485.0723571777344,
"logps/real": -371.298828125,
"loss": 0.1143,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -7.317061424255371,
"rewards/margins": 6.85882568359375,
"rewards/real": -0.4582356810569763,
"step": 400
},
{
"epoch": 0.26,
"learning_rate": 4.389721627408993e-07,
"logits/generated": -2.657744884490967,
"logits/real": -2.634624481201172,
"logps/generated": -440.25396728515625,
"logps/real": -309.36871337890625,
"loss": 0.1418,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -6.629510402679443,
"rewards/margins": 6.084707260131836,
"rewards/real": -0.5448042154312134,
"step": 410
},
{
"epoch": 0.27,
"learning_rate": 4.49678800856531e-07,
"logits/generated": -2.7457222938537598,
"logits/real": -2.698697566986084,
"logps/generated": -477.81475830078125,
"logps/real": -359.94305419921875,
"loss": 0.118,
"rewards/accuracies": 0.9375,
"rewards/generated": -5.93577241897583,
"rewards/margins": 6.0500030517578125,
"rewards/real": 0.11423077434301376,
"step": 420
},
{
"epoch": 0.28,
"learning_rate": 4.603854389721627e-07,
"logits/generated": -2.7255027294158936,
"logits/real": -2.6430881023406982,
"logps/generated": -491.6402282714844,
"logps/real": -329.3466796875,
"loss": 0.1015,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.748249053955078,
"rewards/margins": 6.983546257019043,
"rewards/real": 0.23529770970344543,
"step": 430
},
{
"epoch": 0.28,
"learning_rate": 4.710920770877944e-07,
"logits/generated": -2.727783679962158,
"logits/real": -2.6885695457458496,
"logps/generated": -479.18585205078125,
"logps/real": -303.29132080078125,
"loss": 0.1195,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -7.20239782333374,
"rewards/margins": 6.955704689025879,
"rewards/real": -0.24669349193572998,
"step": 440
},
{
"epoch": 0.29,
"learning_rate": 4.817987152034261e-07,
"logits/generated": -2.704719305038452,
"logits/real": -2.672463893890381,
"logps/generated": -448.4974060058594,
"logps/real": -331.91583251953125,
"loss": 0.1325,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -6.272010326385498,
"rewards/margins": 5.807042121887207,
"rewards/real": -0.4649685323238373,
"step": 450
},
{
"epoch": 0.3,
"learning_rate": 4.925053533190578e-07,
"logits/generated": -2.7187066078186035,
"logits/real": -2.650846004486084,
"logps/generated": -499.6753845214844,
"logps/real": -356.98614501953125,
"loss": 0.1159,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -8.36271858215332,
"rewards/margins": 7.270302772521973,
"rewards/real": -1.092416524887085,
"step": 460
},
{
"epoch": 0.3,
"learning_rate": 4.996429421566293e-07,
"logits/generated": -2.65824556350708,
"logits/real": -2.5810179710388184,
"logps/generated": -515.4708862304688,
"logps/real": -335.44573974609375,
"loss": 0.1184,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -9.19658088684082,
"rewards/margins": 8.151226043701172,
"rewards/real": -1.0453550815582275,
"step": 470
},
{
"epoch": 0.31,
"learning_rate": 4.98452749345394e-07,
"logits/generated": -2.572777032852173,
"logits/real": -2.529254913330078,
"logps/generated": -461.7464294433594,
"logps/real": -406.6604309082031,
"loss": 0.0779,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -8.338438034057617,
"rewards/margins": 7.123196601867676,
"rewards/real": -1.2152409553527832,
"step": 480
},
{
"epoch": 0.31,
"learning_rate": 4.972625565341585e-07,
"logits/generated": -2.5586600303649902,
"logits/real": -2.4877161979675293,
"logps/generated": -430.3533630371094,
"logps/real": -323.3705749511719,
"loss": 0.0999,
"rewards/accuracies": 0.9375,
"rewards/generated": -7.9221014976501465,
"rewards/margins": 7.004052639007568,
"rewards/real": -0.9180490374565125,
"step": 490
},
{
"epoch": 0.32,
"learning_rate": 4.960723637229232e-07,
"logits/generated": -2.5836546421051025,
"logits/real": -2.552192211151123,
"logps/generated": -456.425537109375,
"logps/real": -340.4407958984375,
"loss": 0.0722,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -8.413108825683594,
"rewards/margins": 7.556717872619629,
"rewards/real": -0.8563922047615051,
"step": 500
},
{
"epoch": 0.33,
"learning_rate": 4.948821709116876e-07,
"logits/generated": -2.632378101348877,
"logits/real": -2.586066246032715,
"logps/generated": -491.17681884765625,
"logps/real": -358.8891906738281,
"loss": 0.1264,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -9.601828575134277,
"rewards/margins": 8.468477249145508,
"rewards/real": -1.1333516836166382,
"step": 510
},
{
"epoch": 0.33,
"learning_rate": 4.936919781004522e-07,
"logits/generated": -2.6995837688446045,
"logits/real": -2.663362979888916,
"logps/generated": -473.89410400390625,
"logps/real": -377.1138610839844,
"loss": 0.1473,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -8.111103057861328,
"rewards/margins": 7.439300537109375,
"rewards/real": -0.6718028783798218,
"step": 520
},
{
"epoch": 0.34,
"learning_rate": 4.925017852892168e-07,
"logits/generated": -2.7533721923828125,
"logits/real": -2.7160682678222656,
"logps/generated": -463.5284729003906,
"logps/real": -364.9224548339844,
"loss": 0.1281,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -6.726442813873291,
"rewards/margins": 7.018113136291504,
"rewards/real": 0.29166945815086365,
"step": 530
},
{
"epoch": 0.35,
"learning_rate": 4.913115924779814e-07,
"logits/generated": -2.692500352859497,
"logits/real": -2.6972968578338623,
"logps/generated": -485.6321716308594,
"logps/real": -365.08599853515625,
"loss": 0.093,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -7.418447971343994,
"rewards/margins": 6.375821113586426,
"rewards/real": -1.0426270961761475,
"step": 540
},
{
"epoch": 0.35,
"learning_rate": 4.90121399666746e-07,
"logits/generated": -2.6083171367645264,
"logits/real": -2.5617499351501465,
"logps/generated": -464.82916259765625,
"logps/real": -343.98126220703125,
"loss": 0.0843,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -7.705323696136475,
"rewards/margins": 7.201271057128906,
"rewards/real": -0.504052996635437,
"step": 550
},
{
"epoch": 0.36,
"learning_rate": 4.889312068555106e-07,
"logits/generated": -2.628220558166504,
"logits/real": -2.526280403137207,
"logps/generated": -502.65032958984375,
"logps/real": -313.36639404296875,
"loss": 0.1052,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -9.855968475341797,
"rewards/margins": 8.44670295715332,
"rewards/real": -1.4092657566070557,
"step": 560
},
{
"epoch": 0.37,
"learning_rate": 4.877410140442752e-07,
"logits/generated": -2.6273674964904785,
"logits/real": -2.5923876762390137,
"logps/generated": -443.7357482910156,
"logps/real": -344.1656188964844,
"loss": 0.1226,
"rewards/accuracies": 0.9375,
"rewards/generated": -9.525670051574707,
"rewards/margins": 7.985457420349121,
"rewards/real": -1.5402114391326904,
"step": 570
},
{
"epoch": 0.37,
"learning_rate": 4.865508212330398e-07,
"logits/generated": -2.734830379486084,
"logits/real": -2.637960433959961,
"logps/generated": -488.81781005859375,
"logps/real": -388.02618408203125,
"loss": 0.1234,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -8.961756706237793,
"rewards/margins": 8.064661979675293,
"rewards/real": -0.8970959782600403,
"step": 580
},
{
"epoch": 0.38,
"learning_rate": 4.853606284218044e-07,
"logits/generated": -2.6835715770721436,
"logits/real": -2.6238436698913574,
"logps/generated": -435.009033203125,
"logps/real": -347.2498474121094,
"loss": 0.08,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -7.5509772300720215,
"rewards/margins": 7.190362453460693,
"rewards/real": -0.3606160879135132,
"step": 590
},
{
"epoch": 0.39,
"learning_rate": 4.841704356105689e-07,
"logits/generated": -2.667619466781616,
"logits/real": -2.594552516937256,
"logps/generated": -485.05596923828125,
"logps/real": -357.52520751953125,
"loss": 0.1011,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -8.562358856201172,
"rewards/margins": 7.882147789001465,
"rewards/real": -0.6802110075950623,
"step": 600
},
{
"epoch": 0.39,
"learning_rate": 4.829802427993334e-07,
"logits/generated": -2.7254223823547363,
"logits/real": -2.674651861190796,
"logps/generated": -486.1986389160156,
"logps/real": -366.41973876953125,
"loss": 0.1319,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -9.714421272277832,
"rewards/margins": 8.38983154296875,
"rewards/real": -1.3245890140533447,
"step": 610
},
{
"epoch": 0.4,
"learning_rate": 4.81790049988098e-07,
"logits/generated": -2.7993836402893066,
"logits/real": -2.6992013454437256,
"logps/generated": -477.964111328125,
"logps/real": -352.30255126953125,
"loss": 0.1332,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -9.314876556396484,
"rewards/margins": 8.130788803100586,
"rewards/real": -1.1840870380401611,
"step": 620
},
{
"epoch": 0.4,
"learning_rate": 4.805998571768626e-07,
"logits/generated": -2.7570552825927734,
"logits/real": -2.722883701324463,
"logps/generated": -466.1041564941406,
"logps/real": -327.46435546875,
"loss": 0.0768,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -10.392984390258789,
"rewards/margins": 8.88233757019043,
"rewards/real": -1.510647177696228,
"step": 630
},
{
"epoch": 0.41,
"learning_rate": 4.794096643656272e-07,
"logits/generated": -2.7757675647735596,
"logits/real": -2.696953535079956,
"logps/generated": -573.4818115234375,
"logps/real": -451.28277587890625,
"loss": 0.0841,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -10.725347518920898,
"rewards/margins": 9.061750411987305,
"rewards/real": -1.6635980606079102,
"step": 640
},
{
"epoch": 0.42,
"learning_rate": 4.782194715543918e-07,
"logits/generated": -2.754331111907959,
"logits/real": -2.675215482711792,
"logps/generated": -536.6560668945312,
"logps/real": -378.1103515625,
"loss": 0.1631,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -9.196868896484375,
"rewards/margins": 8.15418815612793,
"rewards/real": -1.0426809787750244,
"step": 650
},
{
"epoch": 0.42,
"learning_rate": 4.770292787431564e-07,
"logits/generated": -2.869988441467285,
"logits/real": -2.751678943634033,
"logps/generated": -494.60772705078125,
"logps/real": -374.7413024902344,
"loss": 0.1045,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -8.500930786132812,
"rewards/margins": 7.580558776855469,
"rewards/real": -0.920372486114502,
"step": 660
},
{
"epoch": 0.43,
"learning_rate": 4.7583908593192097e-07,
"logits/generated": -2.8370208740234375,
"logits/real": -2.6929588317871094,
"logps/generated": -486.40533447265625,
"logps/real": -372.7452087402344,
"loss": 0.0754,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -9.398336410522461,
"rewards/margins": 8.141416549682617,
"rewards/real": -1.2569185495376587,
"step": 670
},
{
"epoch": 0.44,
"learning_rate": 4.746488931206855e-07,
"logits/generated": -2.7330057621002197,
"logits/real": -2.7403512001037598,
"logps/generated": -512.094970703125,
"logps/real": -335.0165100097656,
"loss": 0.0968,
"rewards/accuracies": 1.0,
"rewards/generated": -10.172496795654297,
"rewards/margins": 9.116006851196289,
"rewards/real": -1.056490182876587,
"step": 680
},
{
"epoch": 0.44,
"learning_rate": 4.734587003094501e-07,
"logits/generated": -2.8084769248962402,
"logits/real": -2.7017087936401367,
"logps/generated": -514.4985961914062,
"logps/real": -370.76409912109375,
"loss": 0.075,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -9.885915756225586,
"rewards/margins": 8.93104076385498,
"rewards/real": -0.954875111579895,
"step": 690
},
{
"epoch": 0.45,
"learning_rate": 4.722685074982147e-07,
"logits/generated": -2.7886388301849365,
"logits/real": -2.7220773696899414,
"logps/generated": -485.0414123535156,
"logps/real": -330.4255065917969,
"loss": 0.1032,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -10.818674087524414,
"rewards/margins": 9.245843887329102,
"rewards/real": -1.5728291273117065,
"step": 700
},
{
"epoch": 0.46,
"learning_rate": 4.710783146869793e-07,
"logits/generated": -2.8421998023986816,
"logits/real": -2.718574047088623,
"logps/generated": -492.8663024902344,
"logps/real": -378.52239990234375,
"loss": 0.1112,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -10.291857719421387,
"rewards/margins": 9.014776229858398,
"rewards/real": -1.277081847190857,
"step": 710
},
{
"epoch": 0.46,
"learning_rate": 4.698881218757438e-07,
"logits/generated": -2.7403712272644043,
"logits/real": -2.693634271621704,
"logps/generated": -450.5480041503906,
"logps/real": -318.03704833984375,
"loss": 0.0636,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -11.008447647094727,
"rewards/margins": 8.986620903015137,
"rewards/real": -2.021826982498169,
"step": 720
},
{
"epoch": 0.47,
"learning_rate": 4.6869792906450845e-07,
"logits/generated": -2.738825559616089,
"logits/real": -2.6762800216674805,
"logps/generated": -516.190673828125,
"logps/real": -378.79364013671875,
"loss": 0.0617,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.853250503540039,
"rewards/margins": 9.828583717346191,
"rewards/real": -2.0246665477752686,
"step": 730
},
{
"epoch": 0.48,
"learning_rate": 4.67507736253273e-07,
"logits/generated": -2.772712230682373,
"logits/real": -2.7906768321990967,
"logps/generated": -503.3374938964844,
"logps/real": -324.5931396484375,
"loss": 0.075,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -11.763754844665527,
"rewards/margins": 10.069581985473633,
"rewards/real": -1.694173812866211,
"step": 740
},
{
"epoch": 0.48,
"learning_rate": 4.6631754344203763e-07,
"logits/generated": -2.74733829498291,
"logits/real": -2.7517824172973633,
"logps/generated": -493.4088439941406,
"logps/real": -340.09991455078125,
"loss": 0.0989,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -10.747503280639648,
"rewards/margins": 8.963977813720703,
"rewards/real": -1.7835248708724976,
"step": 750
},
{
"epoch": 0.49,
"learning_rate": 4.6512735063080217e-07,
"logits/generated": -2.7257447242736816,
"logits/real": -2.718722343444824,
"logps/generated": -498.18255615234375,
"logps/real": -334.10333251953125,
"loss": 0.1144,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -11.353604316711426,
"rewards/margins": 9.699551582336426,
"rewards/real": -1.654052495956421,
"step": 760
},
{
"epoch": 0.49,
"learning_rate": 4.6393715781956676e-07,
"logits/generated": -2.699676990509033,
"logits/real": -2.6231935024261475,
"logps/generated": -525.2720336914062,
"logps/real": -386.4446716308594,
"loss": 0.0671,
"rewards/accuracies": 1.0,
"rewards/generated": -10.2306547164917,
"rewards/margins": 9.587125778198242,
"rewards/real": -0.6435292959213257,
"step": 770
},
{
"epoch": 0.5,
"learning_rate": 4.6274696500833135e-07,
"logits/generated": -2.677091360092163,
"logits/real": -2.612697124481201,
"logps/generated": -524.1741943359375,
"logps/real": -354.40130615234375,
"loss": 0.1003,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -11.090825080871582,
"rewards/margins": 9.66313362121582,
"rewards/real": -1.4276920557022095,
"step": 780
},
{
"epoch": 0.51,
"learning_rate": 4.6155677219709594e-07,
"logits/generated": -2.6755471229553223,
"logits/real": -2.562119960784912,
"logps/generated": -477.6163024902344,
"logps/real": -376.70867919921875,
"loss": 0.0953,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -10.81368350982666,
"rewards/margins": 9.579755783081055,
"rewards/real": -1.2339270114898682,
"step": 790
},
{
"epoch": 0.51,
"learning_rate": 4.603665793858605e-07,
"logits/generated": -2.6505370140075684,
"logits/real": -2.5655438899993896,
"logps/generated": -453.52276611328125,
"logps/real": -346.87591552734375,
"loss": 0.1396,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.680634498596191,
"rewards/margins": 8.618191719055176,
"rewards/real": -2.062441825866699,
"step": 800
},
{
"epoch": 0.52,
"learning_rate": 4.5917638657462507e-07,
"logits/generated": -2.5472962856292725,
"logits/real": -2.4991610050201416,
"logps/generated": -514.5006103515625,
"logps/real": -402.99920654296875,
"loss": 0.1054,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -10.380758285522461,
"rewards/margins": 8.436528205871582,
"rewards/real": -1.9442304372787476,
"step": 810
},
{
"epoch": 0.53,
"learning_rate": 4.5798619376338966e-07,
"logits/generated": -2.5719590187072754,
"logits/real": -2.519853115081787,
"logps/generated": -456.893310546875,
"logps/real": -343.7867431640625,
"loss": 0.1149,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.286421775817871,
"rewards/margins": 8.027682304382324,
"rewards/real": -2.258739471435547,
"step": 820
},
{
"epoch": 0.53,
"learning_rate": 4.567960009521542e-07,
"logits/generated": -2.5605781078338623,
"logits/real": -2.501222610473633,
"logps/generated": -433.1905212402344,
"logps/real": -311.0120544433594,
"loss": 0.0988,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -10.496920585632324,
"rewards/margins": 8.129188537597656,
"rewards/real": -2.367732048034668,
"step": 830
},
{
"epoch": 0.54,
"learning_rate": 4.5560580814091884e-07,
"logits/generated": -2.565680503845215,
"logits/real": -2.4589760303497314,
"logps/generated": -498.48583984375,
"logps/real": -353.51470947265625,
"loss": 0.0877,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.815507888793945,
"rewards/margins": 10.009346961975098,
"rewards/real": -2.8061606884002686,
"step": 840
},
{
"epoch": 0.55,
"learning_rate": 4.5441561532968337e-07,
"logits/generated": -2.524672746658325,
"logits/real": -2.4803435802459717,
"logps/generated": -510.71533203125,
"logps/real": -395.97503662109375,
"loss": 0.1072,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.569852828979492,
"rewards/margins": 8.171304702758789,
"rewards/real": -2.398545742034912,
"step": 850
},
{
"epoch": 0.55,
"learning_rate": 4.5322542251844796e-07,
"logits/generated": -2.4687283039093018,
"logits/real": -2.4027516841888428,
"logps/generated": -550.343017578125,
"logps/real": -387.80523681640625,
"loss": 0.0655,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -13.694091796875,
"rewards/margins": 10.239914894104004,
"rewards/real": -3.454176425933838,
"step": 860
},
{
"epoch": 0.56,
"learning_rate": 4.5203522970721255e-07,
"logits/generated": -2.4605631828308105,
"logits/real": -2.317789077758789,
"logps/generated": -515.8772583007812,
"logps/real": -373.03094482421875,
"loss": 0.0921,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -13.49272346496582,
"rewards/margins": 9.03429126739502,
"rewards/real": -4.458432197570801,
"step": 870
},
{
"epoch": 0.57,
"learning_rate": 4.5084503689597714e-07,
"logits/generated": -2.5647082328796387,
"logits/real": -2.478529453277588,
"logps/generated": -497.2862854003906,
"logps/real": -409.93402099609375,
"loss": 0.1512,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -10.405123710632324,
"rewards/margins": 8.666096687316895,
"rewards/real": -1.7390273809432983,
"step": 880
},
{
"epoch": 0.57,
"learning_rate": 4.496548440847417e-07,
"logits/generated": -2.4828336238861084,
"logits/real": -2.413994550704956,
"logps/generated": -457.74127197265625,
"logps/real": -331.61566162109375,
"loss": 0.107,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.047932624816895,
"rewards/margins": 8.361185073852539,
"rewards/real": -1.6867475509643555,
"step": 890
},
{
"epoch": 0.58,
"learning_rate": 4.484646512735063e-07,
"logits/generated": -2.5370724201202393,
"logits/real": -2.4653468132019043,
"logps/generated": -478.68011474609375,
"logps/real": -370.4181213378906,
"loss": 0.1372,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -10.04428768157959,
"rewards/margins": 8.153284072875977,
"rewards/real": -1.8910045623779297,
"step": 900
},
{
"epoch": 0.58,
"learning_rate": 4.4727445846227086e-07,
"logits/generated": -2.500948667526245,
"logits/real": -2.420001745223999,
"logps/generated": -470.17803955078125,
"logps/real": -346.50494384765625,
"loss": 0.0979,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -9.731805801391602,
"rewards/margins": 8.218327522277832,
"rewards/real": -1.5134775638580322,
"step": 910
},
{
"epoch": 0.59,
"learning_rate": 4.4608426565103545e-07,
"logits/generated": -2.4795870780944824,
"logits/real": -2.42265248298645,
"logps/generated": -478.30303955078125,
"logps/real": -338.5771789550781,
"loss": 0.1485,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -9.962733268737793,
"rewards/margins": 8.512125968933105,
"rewards/real": -1.4506077766418457,
"step": 920
},
{
"epoch": 0.6,
"learning_rate": 4.4489407283980004e-07,
"logits/generated": -2.4491372108459473,
"logits/real": -2.4447901248931885,
"logps/generated": -483.40020751953125,
"logps/real": -362.98577880859375,
"loss": 0.1104,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.003097534179688,
"rewards/margins": 9.003512382507324,
"rewards/real": -1.9995838403701782,
"step": 930
},
{
"epoch": 0.6,
"learning_rate": 4.437038800285646e-07,
"logits/generated": -2.459510326385498,
"logits/real": -2.4044106006622314,
"logps/generated": -475.15625,
"logps/real": -334.83526611328125,
"loss": 0.1292,
"rewards/accuracies": 0.9375,
"rewards/generated": -9.873337745666504,
"rewards/margins": 8.043792724609375,
"rewards/real": -1.829545021057129,
"step": 940
},
{
"epoch": 0.61,
"learning_rate": 4.4251368721732916e-07,
"logits/generated": -2.455169200897217,
"logits/real": -2.3593177795410156,
"logps/generated": -560.1038208007812,
"logps/real": -385.29278564453125,
"loss": 0.1159,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -11.663183212280273,
"rewards/margins": 9.093454360961914,
"rewards/real": -2.569728136062622,
"step": 950
},
{
"epoch": 0.62,
"learning_rate": 4.413234944060938e-07,
"logits/generated": -2.4482192993164062,
"logits/real": -2.3591175079345703,
"logps/generated": -466.2801208496094,
"logps/real": -324.6377868652344,
"loss": 0.138,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.816349029541016,
"rewards/margins": 8.520627975463867,
"rewards/real": -2.2957208156585693,
"step": 960
},
{
"epoch": 0.62,
"learning_rate": 4.4013330159485834e-07,
"logits/generated": -2.435135841369629,
"logits/real": -2.4362385272979736,
"logps/generated": -432.5603942871094,
"logps/real": -325.52471923828125,
"loss": 0.1216,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.173177719116211,
"rewards/margins": 7.554961204528809,
"rewards/real": -2.6182148456573486,
"step": 970
},
{
"epoch": 0.63,
"learning_rate": 4.3894310878362293e-07,
"logits/generated": -2.3807005882263184,
"logits/real": -2.354038715362549,
"logps/generated": -488.814208984375,
"logps/real": -333.68096923828125,
"loss": 0.0683,
"rewards/accuracies": 1.0,
"rewards/generated": -11.279688835144043,
"rewards/margins": 8.787554740905762,
"rewards/real": -2.4921340942382812,
"step": 980
},
{
"epoch": 0.64,
"learning_rate": 4.377529159723875e-07,
"logits/generated": -2.4375739097595215,
"logits/real": -2.33906888961792,
"logps/generated": -506.0091247558594,
"logps/real": -340.0153503417969,
"loss": 0.0866,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -11.742327690124512,
"rewards/margins": 9.213804244995117,
"rewards/real": -2.5285239219665527,
"step": 990
},
{
"epoch": 0.64,
"learning_rate": 4.365627231611521e-07,
"logits/generated": -2.373945951461792,
"logits/real": -2.355705738067627,
"logps/generated": -559.3948364257812,
"logps/real": -378.2243957519531,
"loss": 0.1036,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -13.373077392578125,
"rewards/margins": 11.117111206054688,
"rewards/real": -2.255967617034912,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 4.3537253034991665e-07,
"logits/generated": -2.3554375171661377,
"logits/real": -2.2880444526672363,
"logps/generated": -538.6488037109375,
"logps/real": -424.7191467285156,
"loss": 0.1104,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.977459907531738,
"rewards/margins": 9.986202239990234,
"rewards/real": -2.9912569522857666,
"step": 1010
},
{
"epoch": 0.66,
"learning_rate": 4.3418233753868124e-07,
"logits/generated": -2.3351311683654785,
"logits/real": -2.3256137371063232,
"logps/generated": -551.7449340820312,
"logps/real": -456.4244079589844,
"loss": 0.0951,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -11.744857788085938,
"rewards/margins": 10.64583969116211,
"rewards/real": -1.0990195274353027,
"step": 1020
},
{
"epoch": 0.66,
"learning_rate": 4.3299214472744583e-07,
"logits/generated": -2.449162006378174,
"logits/real": -2.3387961387634277,
"logps/generated": -493.4405822753906,
"logps/real": -366.27191162109375,
"loss": 0.0719,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.17166519165039,
"rewards/margins": 9.858702659606934,
"rewards/real": -1.3129618167877197,
"step": 1030
},
{
"epoch": 0.67,
"learning_rate": 4.3180195191621036e-07,
"logits/generated": -2.372615337371826,
"logits/real": -2.357564687728882,
"logps/generated": -506.31890869140625,
"logps/real": -382.11859130859375,
"loss": 0.1139,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -10.259264945983887,
"rewards/margins": 8.731651306152344,
"rewards/real": -1.5276130437850952,
"step": 1040
},
{
"epoch": 0.67,
"learning_rate": 4.30611759104975e-07,
"logits/generated": -2.4573209285736084,
"logits/real": -2.3666205406188965,
"logps/generated": -515.4904174804688,
"logps/real": -379.75238037109375,
"loss": 0.0756,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.583343505859375,
"rewards/margins": 10.160721778869629,
"rewards/real": -2.422621011734009,
"step": 1050
},
{
"epoch": 0.68,
"learning_rate": 4.2942156629373954e-07,
"logits/generated": -2.316649913787842,
"logits/real": -2.2139687538146973,
"logps/generated": -532.4139404296875,
"logps/real": -382.7889404296875,
"loss": 0.0792,
"rewards/accuracies": 0.9375,
"rewards/generated": -13.652563095092773,
"rewards/margins": 9.413002967834473,
"rewards/real": -4.239560604095459,
"step": 1060
},
{
"epoch": 0.69,
"learning_rate": 4.2823137348250413e-07,
"logits/generated": -2.2381844520568848,
"logits/real": -2.1988272666931152,
"logps/generated": -531.000244140625,
"logps/real": -389.1220397949219,
"loss": 0.0604,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -13.427118301391602,
"rewards/margins": 10.272978782653809,
"rewards/real": -3.1541380882263184,
"step": 1070
},
{
"epoch": 0.69,
"learning_rate": 4.270411806712687e-07,
"logits/generated": -2.3720908164978027,
"logits/real": -2.348632335662842,
"logps/generated": -521.8538818359375,
"logps/real": -375.3919677734375,
"loss": 0.0944,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -13.00294017791748,
"rewards/margins": 10.072427749633789,
"rewards/real": -2.930511474609375,
"step": 1080
},
{
"epoch": 0.7,
"learning_rate": 4.258509878600333e-07,
"logits/generated": -2.3700733184814453,
"logits/real": -2.3018269538879395,
"logps/generated": -437.77056884765625,
"logps/real": -336.5913391113281,
"loss": 0.1074,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -10.549997329711914,
"rewards/margins": 8.99598217010498,
"rewards/real": -1.5540151596069336,
"step": 1090
},
{
"epoch": 0.71,
"learning_rate": 4.2466079504879785e-07,
"logits/generated": -2.4425048828125,
"logits/real": -2.3580105304718018,
"logps/generated": -480.294677734375,
"logps/real": -348.6474914550781,
"loss": 0.1287,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -10.765533447265625,
"rewards/margins": 8.889466285705566,
"rewards/real": -1.876068115234375,
"step": 1100
},
{
"epoch": 0.71,
"learning_rate": 4.234706022375625e-07,
"logits/generated": -2.4600212574005127,
"logits/real": -2.46691632270813,
"logps/generated": -464.4510803222656,
"logps/real": -327.79296875,
"loss": 0.1345,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -10.468949317932129,
"rewards/margins": 8.587320327758789,
"rewards/real": -1.8816286325454712,
"step": 1110
},
{
"epoch": 0.72,
"learning_rate": 4.2228040942632703e-07,
"logits/generated": -2.4465115070343018,
"logits/real": -2.4585764408111572,
"logps/generated": -460.46795654296875,
"logps/real": -305.36920166015625,
"loss": 0.1342,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -9.80966854095459,
"rewards/margins": 7.955733299255371,
"rewards/real": -1.8539355993270874,
"step": 1120
},
{
"epoch": 0.73,
"learning_rate": 4.210902166150916e-07,
"logits/generated": -2.297677516937256,
"logits/real": -2.2483174800872803,
"logps/generated": -491.7379455566406,
"logps/real": -358.0972900390625,
"loss": 0.1178,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -11.660999298095703,
"rewards/margins": 9.423800468444824,
"rewards/real": -2.237199306488037,
"step": 1130
},
{
"epoch": 0.73,
"learning_rate": 4.199000238038562e-07,
"logits/generated": -2.337754249572754,
"logits/real": -2.2701668739318848,
"logps/generated": -540.6451416015625,
"logps/real": -358.3714294433594,
"loss": 0.0784,
"rewards/accuracies": 0.9375,
"rewards/generated": -12.581125259399414,
"rewards/margins": 10.22091293334961,
"rewards/real": -2.3602118492126465,
"step": 1140
},
{
"epoch": 0.74,
"learning_rate": 4.187098309926208e-07,
"logits/generated": -2.328687906265259,
"logits/real": -2.2642407417297363,
"logps/generated": -510.14031982421875,
"logps/real": -395.0322570800781,
"loss": 0.09,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -11.54482650756836,
"rewards/margins": 9.52647590637207,
"rewards/real": -2.0183498859405518,
"step": 1150
},
{
"epoch": 0.75,
"learning_rate": 4.1751963818138534e-07,
"logits/generated": -2.2651526927948,
"logits/real": -2.2788243293762207,
"logps/generated": -504.93603515625,
"logps/real": -315.02191162109375,
"loss": 0.1041,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -13.738065719604492,
"rewards/margins": 10.608281135559082,
"rewards/real": -3.129784345626831,
"step": 1160
},
{
"epoch": 0.75,
"learning_rate": 4.1632944537015e-07,
"logits/generated": -2.2317709922790527,
"logits/real": -2.24200177192688,
"logps/generated": -509.27069091796875,
"logps/real": -339.88775634765625,
"loss": 0.1091,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -13.493547439575195,
"rewards/margins": 10.474954605102539,
"rewards/real": -3.0185914039611816,
"step": 1170
},
{
"epoch": 0.76,
"learning_rate": 4.151392525589145e-07,
"logits/generated": -2.1863222122192383,
"logits/real": -2.1978249549865723,
"logps/generated": -579.1888427734375,
"logps/real": -406.6951904296875,
"loss": 0.0913,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -14.355679512023926,
"rewards/margins": 11.128015518188477,
"rewards/real": -3.227665424346924,
"step": 1180
},
{
"epoch": 0.76,
"learning_rate": 4.139490597476791e-07,
"logits/generated": -2.2855031490325928,
"logits/real": -2.2533984184265137,
"logps/generated": -515.9293823242188,
"logps/real": -389.2561340332031,
"loss": 0.098,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -12.194608688354492,
"rewards/margins": 9.927949905395508,
"rewards/real": -2.2666568756103516,
"step": 1190
},
{
"epoch": 0.77,
"learning_rate": 4.127588669364437e-07,
"logits/generated": -2.2684109210968018,
"logits/real": -2.2653377056121826,
"logps/generated": -492.51708984375,
"logps/real": -367.3876953125,
"loss": 0.0969,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -12.415742874145508,
"rewards/margins": 8.819601058959961,
"rewards/real": -3.5961413383483887,
"step": 1200
},
{
"epoch": 0.78,
"learning_rate": 4.115686741252083e-07,
"logits/generated": -2.3267722129821777,
"logits/real": -2.3436107635498047,
"logps/generated": -532.1981201171875,
"logps/real": -420.6949157714844,
"loss": 0.0925,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -11.921398162841797,
"rewards/margins": 9.262883186340332,
"rewards/real": -2.6585140228271484,
"step": 1210
},
{
"epoch": 0.78,
"learning_rate": 4.103784813139728e-07,
"logits/generated": -2.3338916301727295,
"logits/real": -2.334585666656494,
"logps/generated": -532.9281616210938,
"logps/real": -377.1614990234375,
"loss": 0.1003,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.63199234008789,
"rewards/margins": 9.967310905456543,
"rewards/real": -2.6646811962127686,
"step": 1220
},
{
"epoch": 0.79,
"learning_rate": 4.091882885027374e-07,
"logits/generated": -2.4137134552001953,
"logits/real": -2.4191126823425293,
"logps/generated": -543.4132080078125,
"logps/real": -409.27313232421875,
"loss": 0.0698,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.591083526611328,
"rewards/margins": 9.39649772644043,
"rewards/real": -2.1945860385894775,
"step": 1230
},
{
"epoch": 0.8,
"learning_rate": 4.07998095691502e-07,
"logits/generated": -2.4814438819885254,
"logits/real": -2.460033893585205,
"logps/generated": -558.88427734375,
"logps/real": -361.6279296875,
"loss": 0.0532,
"rewards/accuracies": 1.0,
"rewards/generated": -12.638830184936523,
"rewards/margins": 10.296621322631836,
"rewards/real": -2.342207670211792,
"step": 1240
},
{
"epoch": 0.8,
"learning_rate": 4.0680790288026654e-07,
"logits/generated": -2.347885847091675,
"logits/real": -2.360043525695801,
"logps/generated": -555.4683837890625,
"logps/real": -371.6921081542969,
"loss": 0.0769,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.670600891113281,
"rewards/margins": 10.81347370147705,
"rewards/real": -1.857126235961914,
"step": 1250
},
{
"epoch": 0.81,
"learning_rate": 4.056177100690312e-07,
"logits/generated": -2.285675048828125,
"logits/real": -2.264207363128662,
"logps/generated": -540.85546875,
"logps/real": -306.2080078125,
"loss": 0.0912,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -13.267425537109375,
"rewards/margins": 10.254603385925293,
"rewards/real": -3.0128207206726074,
"step": 1260
},
{
"epoch": 0.82,
"learning_rate": 4.044275172577957e-07,
"logits/generated": -2.256348133087158,
"logits/real": -2.257491111755371,
"logps/generated": -484.46746826171875,
"logps/real": -345.3021240234375,
"loss": 0.0676,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -13.33599853515625,
"rewards/margins": 10.659614562988281,
"rewards/real": -2.6763834953308105,
"step": 1270
},
{
"epoch": 0.82,
"learning_rate": 4.0323732444656036e-07,
"logits/generated": -2.310236930847168,
"logits/real": -2.3040318489074707,
"logps/generated": -652.2039794921875,
"logps/real": -504.1841735839844,
"loss": 0.0643,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -14.383448600769043,
"rewards/margins": 11.50027084350586,
"rewards/real": -2.8831779956817627,
"step": 1280
},
{
"epoch": 0.83,
"learning_rate": 4.020471316353249e-07,
"logits/generated": -2.356076717376709,
"logits/real": -2.3591604232788086,
"logps/generated": -550.447265625,
"logps/real": -353.8184509277344,
"loss": 0.0918,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -14.289446830749512,
"rewards/margins": 11.606106758117676,
"rewards/real": -2.6833410263061523,
"step": 1290
},
{
"epoch": 0.84,
"learning_rate": 4.008569388240895e-07,
"logits/generated": -2.360548257827759,
"logits/real": -2.312885284423828,
"logps/generated": -535.8861083984375,
"logps/real": -367.6419372558594,
"loss": 0.1156,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -13.502528190612793,
"rewards/margins": 10.530913352966309,
"rewards/real": -2.971615791320801,
"step": 1300
},
{
"epoch": 0.84,
"learning_rate": 3.996667460128541e-07,
"logits/generated": -2.3651747703552246,
"logits/real": -2.385131359100342,
"logps/generated": -576.5161743164062,
"logps/real": -374.0749206542969,
"loss": 0.0621,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -14.620333671569824,
"rewards/margins": 11.384607315063477,
"rewards/real": -3.2357261180877686,
"step": 1310
},
{
"epoch": 0.85,
"learning_rate": 3.9847655320161867e-07,
"logits/generated": -2.3751935958862305,
"logits/real": -2.4091992378234863,
"logps/generated": -528.142333984375,
"logps/real": -353.9475402832031,
"loss": 0.1308,
"rewards/accuracies": 0.9375,
"rewards/generated": -13.35066032409668,
"rewards/margins": 11.197701454162598,
"rewards/real": -2.1529600620269775,
"step": 1320
},
{
"epoch": 0.85,
"learning_rate": 3.972863603903832e-07,
"logits/generated": -2.3546595573425293,
"logits/real": -2.3350775241851807,
"logps/generated": -561.2341918945312,
"logps/real": -414.78094482421875,
"loss": 0.0393,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -13.160077095031738,
"rewards/margins": 10.612679481506348,
"rewards/real": -2.5473971366882324,
"step": 1330
},
{
"epoch": 0.86,
"learning_rate": 3.9609616757914784e-07,
"logits/generated": -2.3799915313720703,
"logits/real": -2.4229447841644287,
"logps/generated": -529.3641357421875,
"logps/real": -367.49676513671875,
"loss": 0.0899,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -13.449444770812988,
"rewards/margins": 10.857105255126953,
"rewards/real": -2.5923383235931396,
"step": 1340
},
{
"epoch": 0.87,
"learning_rate": 3.949059747679124e-07,
"logits/generated": -2.3573193550109863,
"logits/real": -2.361992359161377,
"logps/generated": -541.9324340820312,
"logps/real": -408.26885986328125,
"loss": 0.0703,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -13.906323432922363,
"rewards/margins": 10.026899337768555,
"rewards/real": -3.8794217109680176,
"step": 1350
},
{
"epoch": 0.87,
"learning_rate": 3.9371578195667697e-07,
"logits/generated": -2.3486838340759277,
"logits/real": -2.3597350120544434,
"logps/generated": -507.2032775878906,
"logps/real": -343.7771301269531,
"loss": 0.0878,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -14.98302936553955,
"rewards/margins": 11.527512550354004,
"rewards/real": -3.4555180072784424,
"step": 1360
},
{
"epoch": 0.88,
"learning_rate": 3.9252558914544156e-07,
"logits/generated": -2.473050594329834,
"logits/real": -2.515122652053833,
"logps/generated": -507.3802185058594,
"logps/real": -396.953125,
"loss": 0.0873,
"rewards/accuracies": 0.9375,
"rewards/generated": -13.604555130004883,
"rewards/margins": 11.562819480895996,
"rewards/real": -2.041734218597412,
"step": 1370
},
{
"epoch": 0.89,
"learning_rate": 3.9133539633420615e-07,
"logits/generated": -2.3934197425842285,
"logits/real": -2.3536086082458496,
"logps/generated": -532.2623901367188,
"logps/real": -370.50146484375,
"loss": 0.1024,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.107502937316895,
"rewards/margins": 9.64056396484375,
"rewards/real": -2.4669392108917236,
"step": 1380
},
{
"epoch": 0.89,
"learning_rate": 3.901452035229707e-07,
"logits/generated": -2.3607726097106934,
"logits/real": -2.3698906898498535,
"logps/generated": -516.7948608398438,
"logps/real": -358.2063293457031,
"loss": 0.0738,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.62265396118164,
"rewards/margins": 11.326190948486328,
"rewards/real": -1.2964636087417603,
"step": 1390
},
{
"epoch": 0.9,
"learning_rate": 3.8895501071173533e-07,
"logits/generated": -2.3465137481689453,
"logits/real": -2.3854851722717285,
"logps/generated": -521.5613403320312,
"logps/real": -320.7744140625,
"loss": 0.0709,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.96727180480957,
"rewards/margins": 10.701366424560547,
"rewards/real": -2.2659034729003906,
"step": 1400
},
{
"epoch": 0.91,
"learning_rate": 3.8776481790049987e-07,
"logits/generated": -2.39072847366333,
"logits/real": -2.3972249031066895,
"logps/generated": -533.3491821289062,
"logps/real": -341.78558349609375,
"loss": 0.0688,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -14.228726387023926,
"rewards/margins": 12.107359886169434,
"rewards/real": -2.1213667392730713,
"step": 1410
},
{
"epoch": 0.91,
"learning_rate": 3.865746250892644e-07,
"logits/generated": -2.2480862140655518,
"logits/real": -2.340782642364502,
"logps/generated": -566.9257202148438,
"logps/real": -318.01495361328125,
"loss": 0.0588,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -15.223588943481445,
"rewards/margins": 12.4530029296875,
"rewards/real": -2.77058482170105,
"step": 1420
},
{
"epoch": 0.92,
"learning_rate": 3.8538443227802905e-07,
"logits/generated": -2.261993169784546,
"logits/real": -2.2912187576293945,
"logps/generated": -497.12091064453125,
"logps/real": -354.21795654296875,
"loss": 0.1078,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -13.343725204467773,
"rewards/margins": 10.44404411315918,
"rewards/real": -2.8996803760528564,
"step": 1430
},
{
"epoch": 0.93,
"learning_rate": 3.841942394667936e-07,
"logits/generated": -2.333307981491089,
"logits/real": -2.3561549186706543,
"logps/generated": -516.3057250976562,
"logps/real": -322.0869140625,
"loss": 0.0865,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -13.671916007995605,
"rewards/margins": 10.728243827819824,
"rewards/real": -2.943671226501465,
"step": 1440
},
{
"epoch": 0.93,
"learning_rate": 3.8300404665555817e-07,
"logits/generated": -2.332399845123291,
"logits/real": -2.285750389099121,
"logps/generated": -566.4459228515625,
"logps/real": -421.2608337402344,
"loss": 0.068,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -13.901707649230957,
"rewards/margins": 11.070039749145508,
"rewards/real": -2.8316686153411865,
"step": 1450
},
{
"epoch": 0.94,
"learning_rate": 3.8181385384432276e-07,
"logits/generated": -2.232886791229248,
"logits/real": -2.280543804168701,
"logps/generated": -488.8804626464844,
"logps/real": -350.1278991699219,
"loss": 0.0911,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -12.198708534240723,
"rewards/margins": 9.276993751525879,
"rewards/real": -2.92171573638916,
"step": 1460
},
{
"epoch": 0.94,
"learning_rate": 3.8062366103308735e-07,
"logits/generated": -2.254783868789673,
"logits/real": -2.3048110008239746,
"logps/generated": -550.8900146484375,
"logps/real": -392.82562255859375,
"loss": 0.1241,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -13.891596794128418,
"rewards/margins": 11.135417938232422,
"rewards/real": -2.7561793327331543,
"step": 1470
},
{
"epoch": 0.95,
"learning_rate": 3.794334682218519e-07,
"logits/generated": -2.182992458343506,
"logits/real": -2.2217135429382324,
"logps/generated": -545.983154296875,
"logps/real": -389.5903015136719,
"loss": 0.0698,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -14.36902141571045,
"rewards/margins": 11.656338691711426,
"rewards/real": -2.712681770324707,
"step": 1480
},
{
"epoch": 0.96,
"learning_rate": 3.7824327541061653e-07,
"logits/generated": -2.213576555252075,
"logits/real": -2.1897943019866943,
"logps/generated": -545.77490234375,
"logps/real": -393.1239013671875,
"loss": 0.1254,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -13.385502815246582,
"rewards/margins": 10.636938095092773,
"rewards/real": -2.748565673828125,
"step": 1490
},
{
"epoch": 0.96,
"learning_rate": 3.7705308259938107e-07,
"logits/generated": -2.2783615589141846,
"logits/real": -2.3009047508239746,
"logps/generated": -476.43389892578125,
"logps/real": -353.0292053222656,
"loss": 0.0697,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.137995719909668,
"rewards/margins": 9.004945755004883,
"rewards/real": -3.1330504417419434,
"step": 1500
},
{
"epoch": 0.97,
"learning_rate": 3.7586288978814566e-07,
"logits/generated": -2.351835250854492,
"logits/real": -2.3665931224823,
"logps/generated": -560.801025390625,
"logps/real": -415.95599365234375,
"loss": 0.0882,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -13.19482707977295,
"rewards/margins": 11.202047348022461,
"rewards/real": -1.992780327796936,
"step": 1510
},
{
"epoch": 0.98,
"learning_rate": 3.7467269697691025e-07,
"logits/generated": -2.382901430130005,
"logits/real": -2.393000841140747,
"logps/generated": -505.42071533203125,
"logps/real": -403.2711181640625,
"loss": 0.1249,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -11.522603988647461,
"rewards/margins": 9.553156852722168,
"rewards/real": -1.9694464206695557,
"step": 1520
},
{
"epoch": 0.98,
"learning_rate": 3.7348250416567484e-07,
"logits/generated": -2.3661327362060547,
"logits/real": -2.366842746734619,
"logps/generated": -558.5916748046875,
"logps/real": -373.6806640625,
"loss": 0.08,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.855193138122559,
"rewards/margins": 10.633702278137207,
"rewards/real": -1.2214914560317993,
"step": 1530
},
{
"epoch": 0.99,
"learning_rate": 3.722923113544394e-07,
"logits/generated": -2.339836597442627,
"logits/real": -2.34541654586792,
"logps/generated": -536.8855590820312,
"logps/real": -369.8499450683594,
"loss": 0.0723,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -11.604657173156738,
"rewards/margins": 10.141790390014648,
"rewards/real": -1.462865948677063,
"step": 1540
},
{
"epoch": 1.0,
"learning_rate": 3.71102118543204e-07,
"logits/generated": -2.3748319149017334,
"logits/real": -2.347130298614502,
"logps/generated": -509.50506591796875,
"logps/real": -383.81207275390625,
"loss": 0.1114,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -10.870028495788574,
"rewards/margins": 9.974245071411133,
"rewards/real": -0.8957852125167847,
"step": 1550
},
{
"epoch": 1.0,
"learning_rate": 3.6991192573196855e-07,
"logits/generated": -2.3040242195129395,
"logits/real": -2.3185038566589355,
"logps/generated": -511.0401916503906,
"logps/real": -363.1226501464844,
"loss": 0.0345,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -13.427467346191406,
"rewards/margins": 11.945287704467773,
"rewards/real": -1.4821794033050537,
"step": 1560
},
{
"epoch": 1.01,
"learning_rate": 3.6872173292073314e-07,
"logits/generated": -2.3647196292877197,
"logits/real": -2.3546738624572754,
"logps/generated": -562.1310424804688,
"logps/real": -375.43316650390625,
"loss": 0.0082,
"rewards/accuracies": 1.0,
"rewards/generated": -14.149296760559082,
"rewards/margins": 12.207903861999512,
"rewards/real": -1.9413917064666748,
"step": 1570
},
{
"epoch": 1.02,
"learning_rate": 3.6753154010949773e-07,
"logits/generated": -2.301741123199463,
"logits/real": -2.3652164936065674,
"logps/generated": -549.0155639648438,
"logps/real": -403.6900329589844,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/generated": -15.563409805297852,
"rewards/margins": 14.276460647583008,
"rewards/real": -1.2869514226913452,
"step": 1580
},
{
"epoch": 1.02,
"learning_rate": 3.663413472982623e-07,
"logits/generated": -2.261230707168579,
"logits/real": -2.3311500549316406,
"logps/generated": -563.3414916992188,
"logps/real": -348.5225524902344,
"loss": 0.0126,
"rewards/accuracies": 1.0,
"rewards/generated": -15.899667739868164,
"rewards/margins": 13.73414421081543,
"rewards/real": -2.1655211448669434,
"step": 1590
},
{
"epoch": 1.03,
"learning_rate": 3.6515115448702686e-07,
"logits/generated": -2.372976779937744,
"logits/real": -2.400886058807373,
"logps/generated": -569.90673828125,
"logps/real": -390.90704345703125,
"loss": 0.0055,
"rewards/accuracies": 1.0,
"rewards/generated": -14.986291885375977,
"rewards/margins": 13.204297065734863,
"rewards/real": -1.7819948196411133,
"step": 1600
},
{
"epoch": 1.03,
"learning_rate": 3.639609616757915e-07,
"logits/generated": -2.2624919414520264,
"logits/real": -2.3082499504089355,
"logps/generated": -564.4075317382812,
"logps/real": -394.94287109375,
"loss": 0.017,
"rewards/accuracies": 1.0,
"rewards/generated": -15.597874641418457,
"rewards/margins": 13.840059280395508,
"rewards/real": -1.7578150033950806,
"step": 1610
},
{
"epoch": 1.04,
"learning_rate": 3.6277076886455604e-07,
"logits/generated": -2.1787052154541016,
"logits/real": -2.215146780014038,
"logps/generated": -520.8050537109375,
"logps/real": -337.3519287109375,
"loss": 0.0162,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -15.893610954284668,
"rewards/margins": 13.641830444335938,
"rewards/real": -2.2517800331115723,
"step": 1620
},
{
"epoch": 1.05,
"learning_rate": 3.615805760533206e-07,
"logits/generated": -2.227945566177368,
"logits/real": -2.295309543609619,
"logps/generated": -569.5865478515625,
"logps/real": -361.73980712890625,
"loss": 0.0228,
"rewards/accuracies": 1.0,
"rewards/generated": -16.241802215576172,
"rewards/margins": 14.572728157043457,
"rewards/real": -1.669075608253479,
"step": 1630
},
{
"epoch": 1.05,
"learning_rate": 3.603903832420852e-07,
"logits/generated": -2.281085968017578,
"logits/real": -2.32185697555542,
"logps/generated": -550.8215942382812,
"logps/real": -361.8749694824219,
"loss": 0.0144,
"rewards/accuracies": 1.0,
"rewards/generated": -15.377789497375488,
"rewards/margins": 12.968729972839355,
"rewards/real": -2.4090590476989746,
"step": 1640
},
{
"epoch": 1.06,
"learning_rate": 3.5920019043084976e-07,
"logits/generated": -2.36209774017334,
"logits/real": -2.285122871398926,
"logps/generated": -528.7139892578125,
"logps/real": -321.8282775878906,
"loss": 0.0069,
"rewards/accuracies": 1.0,
"rewards/generated": -15.81471061706543,
"rewards/margins": 13.131657600402832,
"rewards/real": -2.683054208755493,
"step": 1650
},
{
"epoch": 1.07,
"learning_rate": 3.5800999761961435e-07,
"logits/generated": -2.2304556369781494,
"logits/real": -2.2633605003356934,
"logps/generated": -569.7330932617188,
"logps/real": -362.989501953125,
"loss": 0.0132,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -16.337574005126953,
"rewards/margins": 14.290669441223145,
"rewards/real": -2.046905994415283,
"step": 1660
},
{
"epoch": 1.07,
"learning_rate": 3.5681980480837893e-07,
"logits/generated": -2.1861634254455566,
"logits/real": -2.213642120361328,
"logps/generated": -559.1907348632812,
"logps/real": -386.95330810546875,
"loss": 0.0065,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -15.114718437194824,
"rewards/margins": 12.973544120788574,
"rewards/real": -2.1411757469177246,
"step": 1670
},
{
"epoch": 1.08,
"learning_rate": 3.556296119971435e-07,
"logits/generated": -2.160221815109253,
"logits/real": -2.219512701034546,
"logps/generated": -548.2822265625,
"logps/real": -373.2680358886719,
"loss": 0.0046,
"rewards/accuracies": 1.0,
"rewards/generated": -17.96226692199707,
"rewards/margins": 15.323366165161133,
"rewards/real": -2.6389002799987793,
"step": 1680
},
{
"epoch": 1.09,
"learning_rate": 3.5443941918590806e-07,
"logits/generated": -2.237609386444092,
"logits/real": -2.2286012172698975,
"logps/generated": -572.4905395507812,
"logps/real": -334.9125061035156,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/generated": -18.150920867919922,
"rewards/margins": 14.536825180053711,
"rewards/real": -3.614098072052002,
"step": 1690
},
{
"epoch": 1.09,
"learning_rate": 3.532492263746727e-07,
"logits/generated": -2.184624195098877,
"logits/real": -2.211127281188965,
"logps/generated": -608.9227905273438,
"logps/real": -361.32684326171875,
"loss": 0.0088,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.036636352539062,
"rewards/margins": 16.421756744384766,
"rewards/real": -2.6148791313171387,
"step": 1700
},
{
"epoch": 1.1,
"learning_rate": 3.5205903356343724e-07,
"logits/generated": -2.1313374042510986,
"logits/real": -2.173050880432129,
"logps/generated": -598.5973510742188,
"logps/real": -355.4421081542969,
"loss": 0.0157,
"rewards/accuracies": 1.0,
"rewards/generated": -20.13778305053711,
"rewards/margins": 16.43929672241211,
"rewards/real": -3.6984870433807373,
"step": 1710
},
{
"epoch": 1.11,
"learning_rate": 3.5086884075220183e-07,
"logits/generated": -2.087921619415283,
"logits/real": -2.1425328254699707,
"logps/generated": -566.9527587890625,
"logps/real": -332.5347595214844,
"loss": 0.0028,
"rewards/accuracies": 1.0,
"rewards/generated": -19.658199310302734,
"rewards/margins": 15.989227294921875,
"rewards/real": -3.6689727306365967,
"step": 1720
},
{
"epoch": 1.11,
"learning_rate": 3.496786479409664e-07,
"logits/generated": -2.07328200340271,
"logits/real": -2.1263468265533447,
"logps/generated": -567.5440063476562,
"logps/real": -416.94256591796875,
"loss": 0.0033,
"rewards/accuracies": 1.0,
"rewards/generated": -19.27109146118164,
"rewards/margins": 15.73029899597168,
"rewards/real": -3.540794849395752,
"step": 1730
},
{
"epoch": 1.12,
"learning_rate": 3.48488455129731e-07,
"logits/generated": -2.0371181964874268,
"logits/real": -2.082137107849121,
"logps/generated": -556.2427978515625,
"logps/real": -303.4817810058594,
"loss": 0.0128,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.692432403564453,
"rewards/margins": 16.322385787963867,
"rewards/real": -3.3700478076934814,
"step": 1740
},
{
"epoch": 1.12,
"learning_rate": 3.4729826231849555e-07,
"logits/generated": -2.097503423690796,
"logits/real": -2.1778554916381836,
"logps/generated": -610.5614013671875,
"logps/real": -399.98358154296875,
"loss": 0.0139,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.528287887573242,
"rewards/margins": 15.74824047088623,
"rewards/real": -3.780047655105591,
"step": 1750
},
{
"epoch": 1.13,
"learning_rate": 3.461080695072602e-07,
"logits/generated": -2.0831446647644043,
"logits/real": -2.2034473419189453,
"logps/generated": -588.8594970703125,
"logps/real": -398.7830810546875,
"loss": 0.0076,
"rewards/accuracies": 1.0,
"rewards/generated": -17.589412689208984,
"rewards/margins": 16.229389190673828,
"rewards/real": -1.360022783279419,
"step": 1760
},
{
"epoch": 1.14,
"learning_rate": 3.449178766960247e-07,
"logits/generated": -2.1205058097839355,
"logits/real": -2.1790976524353027,
"logps/generated": -588.9290771484375,
"logps/real": -380.9736633300781,
"loss": 0.0096,
"rewards/accuracies": 1.0,
"rewards/generated": -17.767770767211914,
"rewards/margins": 14.602663040161133,
"rewards/real": -3.1651082038879395,
"step": 1770
},
{
"epoch": 1.14,
"learning_rate": 3.4372768388478937e-07,
"logits/generated": -2.1685116291046143,
"logits/real": -2.245673656463623,
"logps/generated": -547.3893432617188,
"logps/real": -357.3170471191406,
"loss": 0.009,
"rewards/accuracies": 1.0,
"rewards/generated": -16.38484001159668,
"rewards/margins": 14.403864860534668,
"rewards/real": -1.9809764623641968,
"step": 1780
},
{
"epoch": 1.15,
"learning_rate": 3.425374910735539e-07,
"logits/generated": -2.249411106109619,
"logits/real": -2.279759645462036,
"logps/generated": -618.198486328125,
"logps/real": -434.0210876464844,
"loss": 0.0164,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -15.497578620910645,
"rewards/margins": 12.881543159484863,
"rewards/real": -2.616034984588623,
"step": 1790
},
{
"epoch": 1.16,
"learning_rate": 3.413472982623185e-07,
"logits/generated": -2.3159961700439453,
"logits/real": -2.294581651687622,
"logps/generated": -537.07861328125,
"logps/real": -337.27874755859375,
"loss": 0.0096,
"rewards/accuracies": 1.0,
"rewards/generated": -17.291156768798828,
"rewards/margins": 15.182905197143555,
"rewards/real": -2.1082491874694824,
"step": 1800
},
{
"epoch": 1.16,
"learning_rate": 3.401571054510831e-07,
"logits/generated": -2.2418415546417236,
"logits/real": -2.266832113265991,
"logps/generated": -598.0546264648438,
"logps/real": -402.66510009765625,
"loss": 0.0129,
"rewards/accuracies": 1.0,
"rewards/generated": -16.92933464050293,
"rewards/margins": 14.629178047180176,
"rewards/real": -2.300158739089966,
"step": 1810
},
{
"epoch": 1.17,
"learning_rate": 3.389669126398476e-07,
"logits/generated": -2.200573205947876,
"logits/real": -2.224865436553955,
"logps/generated": -546.8048706054688,
"logps/real": -372.92010498046875,
"loss": 0.0072,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.779863357543945,
"rewards/margins": 15.325950622558594,
"rewards/real": -3.4539108276367188,
"step": 1820
},
{
"epoch": 1.18,
"learning_rate": 3.377767198286122e-07,
"logits/generated": -2.172501802444458,
"logits/real": -2.224087953567505,
"logps/generated": -577.8551025390625,
"logps/real": -396.276123046875,
"loss": 0.0083,
"rewards/accuracies": 1.0,
"rewards/generated": -19.138914108276367,
"rewards/margins": 15.50109577178955,
"rewards/real": -3.6378180980682373,
"step": 1830
},
{
"epoch": 1.18,
"learning_rate": 3.365865270173768e-07,
"logits/generated": -2.222236394882202,
"logits/real": -2.215315341949463,
"logps/generated": -606.78955078125,
"logps/real": -401.0457763671875,
"loss": 0.009,
"rewards/accuracies": 1.0,
"rewards/generated": -18.88642120361328,
"rewards/margins": 16.262500762939453,
"rewards/real": -2.6239190101623535,
"step": 1840
},
{
"epoch": 1.19,
"learning_rate": 3.353963342061414e-07,
"logits/generated": -2.0716452598571777,
"logits/real": -2.118318796157837,
"logps/generated": -649.42431640625,
"logps/real": -411.53338623046875,
"loss": 0.0088,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.20388412475586,
"rewards/margins": 17.11600112915039,
"rewards/real": -3.0878825187683105,
"step": 1850
},
{
"epoch": 1.2,
"learning_rate": 3.3420614139490593e-07,
"logits/generated": -2.0571255683898926,
"logits/real": -2.0999083518981934,
"logps/generated": -555.8407592773438,
"logps/real": -349.90155029296875,
"loss": 0.006,
"rewards/accuracies": 1.0,
"rewards/generated": -18.651325225830078,
"rewards/margins": 15.208650588989258,
"rewards/real": -3.442675828933716,
"step": 1860
},
{
"epoch": 1.2,
"learning_rate": 3.3301594858367057e-07,
"logits/generated": -2.0144572257995605,
"logits/real": -2.0585360527038574,
"logps/generated": -564.4594116210938,
"logps/real": -357.2227478027344,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -20.891742706298828,
"rewards/margins": 17.29900550842285,
"rewards/real": -3.5927371978759766,
"step": 1870
},
{
"epoch": 1.21,
"learning_rate": 3.318257557724351e-07,
"logits/generated": -2.096843957901001,
"logits/real": -2.2019436359405518,
"logps/generated": -579.1746826171875,
"logps/real": -376.2278137207031,
"loss": 0.0111,
"rewards/accuracies": 1.0,
"rewards/generated": -20.490825653076172,
"rewards/margins": 15.950909614562988,
"rewards/real": -4.539917945861816,
"step": 1880
},
{
"epoch": 1.21,
"learning_rate": 3.306355629611997e-07,
"logits/generated": -2.2774910926818848,
"logits/real": -2.312415599822998,
"logps/generated": -589.5444946289062,
"logps/real": -355.139404296875,
"loss": 0.012,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -17.514019012451172,
"rewards/margins": 14.974294662475586,
"rewards/real": -2.5397236347198486,
"step": 1890
},
{
"epoch": 1.22,
"learning_rate": 3.294453701499643e-07,
"logits/generated": -2.2411015033721924,
"logits/real": -2.2287962436676025,
"logps/generated": -519.8386840820312,
"logps/real": -335.5692443847656,
"loss": 0.0235,
"rewards/accuracies": 1.0,
"rewards/generated": -16.851512908935547,
"rewards/margins": 13.842289924621582,
"rewards/real": -3.0092215538024902,
"step": 1900
},
{
"epoch": 1.23,
"learning_rate": 3.282551773387289e-07,
"logits/generated": -2.2905209064483643,
"logits/real": -2.3136816024780273,
"logps/generated": -624.5968017578125,
"logps/real": -403.45501708984375,
"loss": 0.0117,
"rewards/accuracies": 1.0,
"rewards/generated": -17.708858489990234,
"rewards/margins": 15.356656074523926,
"rewards/real": -2.352205276489258,
"step": 1910
},
{
"epoch": 1.23,
"learning_rate": 3.270649845274934e-07,
"logits/generated": -2.268571376800537,
"logits/real": -2.270794630050659,
"logps/generated": -538.6395263671875,
"logps/real": -346.6217346191406,
"loss": 0.0137,
"rewards/accuracies": 1.0,
"rewards/generated": -12.539231300354004,
"rewards/margins": 12.0559720993042,
"rewards/real": -0.4832596778869629,
"step": 1920
},
{
"epoch": 1.24,
"learning_rate": 3.2587479171625806e-07,
"logits/generated": -2.176980495452881,
"logits/real": -2.2080063819885254,
"logps/generated": -514.9129638671875,
"logps/real": -330.4537048339844,
"loss": 0.008,
"rewards/accuracies": 1.0,
"rewards/generated": -15.298626899719238,
"rewards/margins": 13.659899711608887,
"rewards/real": -1.6387275457382202,
"step": 1930
},
{
"epoch": 1.25,
"learning_rate": 3.246845989050226e-07,
"logits/generated": -2.1879403591156006,
"logits/real": -2.1978180408477783,
"logps/generated": -593.1455078125,
"logps/real": -375.1646423339844,
"loss": 0.0175,
"rewards/accuracies": 1.0,
"rewards/generated": -17.365800857543945,
"rewards/margins": 15.398488998413086,
"rewards/real": -1.967309594154358,
"step": 1940
},
{
"epoch": 1.25,
"learning_rate": 3.234944060937872e-07,
"logits/generated": -2.192488193511963,
"logits/real": -2.1515822410583496,
"logps/generated": -523.1942138671875,
"logps/real": -323.0399475097656,
"loss": 0.0176,
"rewards/accuracies": 1.0,
"rewards/generated": -17.100679397583008,
"rewards/margins": 14.689547538757324,
"rewards/real": -2.4111287593841553,
"step": 1950
},
{
"epoch": 1.26,
"learning_rate": 3.2230421328255177e-07,
"logits/generated": -2.1857666969299316,
"logits/real": -2.1973021030426025,
"logps/generated": -573.9412841796875,
"logps/real": -383.3498840332031,
"loss": 0.0079,
"rewards/accuracies": 1.0,
"rewards/generated": -15.325065612792969,
"rewards/margins": 13.42688274383545,
"rewards/real": -1.8981819152832031,
"step": 1960
},
{
"epoch": 1.27,
"learning_rate": 3.2111402047131636e-07,
"logits/generated": -2.1818251609802246,
"logits/real": -2.138362169265747,
"logps/generated": -582.6113891601562,
"logps/real": -393.2650146484375,
"loss": 0.0099,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -17.432369232177734,
"rewards/margins": 14.267339706420898,
"rewards/real": -3.1650280952453613,
"step": 1970
},
{
"epoch": 1.27,
"learning_rate": 3.199238276600809e-07,
"logits/generated": -2.1671457290649414,
"logits/real": -2.165565013885498,
"logps/generated": -562.9454345703125,
"logps/real": -394.97747802734375,
"loss": 0.0046,
"rewards/accuracies": 1.0,
"rewards/generated": -17.854740142822266,
"rewards/margins": 14.695347785949707,
"rewards/real": -3.159393072128296,
"step": 1980
},
{
"epoch": 1.28,
"learning_rate": 3.1873363484884554e-07,
"logits/generated": -2.1330654621124268,
"logits/real": -2.1521830558776855,
"logps/generated": -639.1904296875,
"logps/real": -405.31842041015625,
"loss": 0.0046,
"rewards/accuracies": 1.0,
"rewards/generated": -20.329586029052734,
"rewards/margins": 17.111438751220703,
"rewards/real": -3.218146800994873,
"step": 1990
},
{
"epoch": 1.29,
"learning_rate": 3.175434420376101e-07,
"logits/generated": -2.189150333404541,
"logits/real": -2.256707191467285,
"logps/generated": -564.0371704101562,
"logps/real": -355.9576110839844,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -19.11702537536621,
"rewards/margins": 15.930302619934082,
"rewards/real": -3.186723232269287,
"step": 2000
},
{
"epoch": 1.29,
"learning_rate": 3.1635324922637467e-07,
"logits/generated": -2.059345245361328,
"logits/real": -2.0931825637817383,
"logps/generated": -592.4450073242188,
"logps/real": -353.2586975097656,
"loss": 0.0079,
"rewards/accuracies": 1.0,
"rewards/generated": -19.638408660888672,
"rewards/margins": 15.883349418640137,
"rewards/real": -3.7550551891326904,
"step": 2010
},
{
"epoch": 1.3,
"learning_rate": 3.1516305641513926e-07,
"logits/generated": -2.092940092086792,
"logits/real": -2.1705400943756104,
"logps/generated": -628.2099609375,
"logps/real": -390.40728759765625,
"loss": 0.0169,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.64360237121582,
"rewards/margins": 16.640026092529297,
"rewards/real": -4.003575801849365,
"step": 2020
},
{
"epoch": 1.3,
"learning_rate": 3.139728636039038e-07,
"logits/generated": -1.9676166772842407,
"logits/real": -2.0171456336975098,
"logps/generated": -546.7244873046875,
"logps/real": -362.56488037109375,
"loss": 0.0139,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.221649169921875,
"rewards/margins": 15.068666458129883,
"rewards/real": -4.152983665466309,
"step": 2030
},
{
"epoch": 1.31,
"learning_rate": 3.127826707926684e-07,
"logits/generated": -2.1424899101257324,
"logits/real": -2.1512458324432373,
"logps/generated": -566.885986328125,
"logps/real": -319.979736328125,
"loss": 0.0091,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.882003784179688,
"rewards/margins": 15.76966667175293,
"rewards/real": -3.112335205078125,
"step": 2040
},
{
"epoch": 1.32,
"learning_rate": 3.11592477981433e-07,
"logits/generated": -2.140007495880127,
"logits/real": -2.138995409011841,
"logps/generated": -605.3294677734375,
"logps/real": -381.41290283203125,
"loss": 0.004,
"rewards/accuracies": 1.0,
"rewards/generated": -18.86813735961914,
"rewards/margins": 14.63634967803955,
"rewards/real": -4.231788158416748,
"step": 2050
},
{
"epoch": 1.32,
"learning_rate": 3.1040228517019756e-07,
"logits/generated": -2.1069555282592773,
"logits/real": -2.0523293018341064,
"logps/generated": -636.1548461914062,
"logps/real": -395.80914306640625,
"loss": 0.0122,
"rewards/accuracies": 1.0,
"rewards/generated": -21.208894729614258,
"rewards/margins": 18.558944702148438,
"rewards/real": -2.649949550628662,
"step": 2060
},
{
"epoch": 1.33,
"learning_rate": 3.092120923589621e-07,
"logits/generated": -2.1070761680603027,
"logits/real": -2.1584606170654297,
"logps/generated": -615.450439453125,
"logps/real": -398.21844482421875,
"loss": 0.0294,
"rewards/accuracies": 1.0,
"rewards/generated": -19.43939208984375,
"rewards/margins": 14.954734802246094,
"rewards/real": -4.484656810760498,
"step": 2070
},
{
"epoch": 1.34,
"learning_rate": 3.0802189954772674e-07,
"logits/generated": -2.0256807804107666,
"logits/real": -2.057490825653076,
"logps/generated": -540.1599731445312,
"logps/real": -362.2354431152344,
"loss": 0.0277,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.157804489135742,
"rewards/margins": 14.374547004699707,
"rewards/real": -3.7832565307617188,
"step": 2080
},
{
"epoch": 1.34,
"learning_rate": 3.068317067364913e-07,
"logits/generated": -2.0899441242218018,
"logits/real": -2.1068785190582275,
"logps/generated": -626.4033813476562,
"logps/real": -453.5624084472656,
"loss": 0.0165,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -17.779939651489258,
"rewards/margins": 14.692364692687988,
"rewards/real": -3.0875754356384277,
"step": 2090
},
{
"epoch": 1.35,
"learning_rate": 3.0564151392525587e-07,
"logits/generated": -1.9991929531097412,
"logits/real": -2.0575671195983887,
"logps/generated": -572.4451904296875,
"logps/real": -387.9151306152344,
"loss": 0.0106,
"rewards/accuracies": 1.0,
"rewards/generated": -18.603885650634766,
"rewards/margins": 14.808444023132324,
"rewards/real": -3.7954421043395996,
"step": 2100
},
{
"epoch": 1.36,
"learning_rate": 3.0445132111402046e-07,
"logits/generated": -1.957606315612793,
"logits/real": -2.0042033195495605,
"logps/generated": -491.47344970703125,
"logps/real": -340.8980712890625,
"loss": 0.0073,
"rewards/accuracies": 1.0,
"rewards/generated": -16.086580276489258,
"rewards/margins": 13.114072799682617,
"rewards/real": -2.9725046157836914,
"step": 2110
},
{
"epoch": 1.36,
"learning_rate": 3.0326112830278505e-07,
"logits/generated": -1.9950910806655884,
"logits/real": -2.022733211517334,
"logps/generated": -572.0457763671875,
"logps/real": -401.0966796875,
"loss": 0.0137,
"rewards/accuracies": 1.0,
"rewards/generated": -18.224430084228516,
"rewards/margins": 15.097747802734375,
"rewards/real": -3.126683473587036,
"step": 2120
},
{
"epoch": 1.37,
"learning_rate": 3.020709354915496e-07,
"logits/generated": -1.8846557140350342,
"logits/real": -2.030510187149048,
"logps/generated": -571.71337890625,
"logps/real": -432.107421875,
"loss": 0.0059,
"rewards/accuracies": 1.0,
"rewards/generated": -17.2412166595459,
"rewards/margins": 14.449191093444824,
"rewards/real": -2.7920241355895996,
"step": 2130
},
{
"epoch": 1.38,
"learning_rate": 3.0088074268031423e-07,
"logits/generated": -1.9635789394378662,
"logits/real": -2.010420322418213,
"logps/generated": -595.8865966796875,
"logps/real": -395.47271728515625,
"loss": 0.0165,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -16.31340217590332,
"rewards/margins": 13.558954238891602,
"rewards/real": -2.754448175430298,
"step": 2140
},
{
"epoch": 1.38,
"learning_rate": 2.9969054986907876e-07,
"logits/generated": -1.8813692331314087,
"logits/real": -1.973508596420288,
"logps/generated": -504.12823486328125,
"logps/real": -347.0367736816406,
"loss": 0.0036,
"rewards/accuracies": 1.0,
"rewards/generated": -16.611045837402344,
"rewards/margins": 14.725499153137207,
"rewards/real": -1.8855485916137695,
"step": 2150
},
{
"epoch": 1.39,
"learning_rate": 2.9850035705784335e-07,
"logits/generated": -1.9549331665039062,
"logits/real": -2.0346851348876953,
"logps/generated": -609.5227661132812,
"logps/real": -419.77618408203125,
"loss": 0.0101,
"rewards/accuracies": 1.0,
"rewards/generated": -17.863967895507812,
"rewards/margins": 14.792566299438477,
"rewards/real": -3.071401357650757,
"step": 2160
},
{
"epoch": 1.39,
"learning_rate": 2.9731016424660794e-07,
"logits/generated": -1.9871995449066162,
"logits/real": -1.9926321506500244,
"logps/generated": -571.4608764648438,
"logps/real": -346.6838684082031,
"loss": 0.0126,
"rewards/accuracies": 1.0,
"rewards/generated": -16.64493751525879,
"rewards/margins": 14.440625190734863,
"rewards/real": -2.2043120861053467,
"step": 2170
},
{
"epoch": 1.4,
"learning_rate": 2.9611997143537253e-07,
"logits/generated": -1.995072603225708,
"logits/real": -1.9926702976226807,
"logps/generated": -639.6512451171875,
"logps/real": -369.5633239746094,
"loss": 0.0106,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.795745849609375,
"rewards/margins": 15.904996871948242,
"rewards/real": -2.890749454498291,
"step": 2180
},
{
"epoch": 1.41,
"learning_rate": 2.9492977862413707e-07,
"logits/generated": -2.0281620025634766,
"logits/real": -1.9819482564926147,
"logps/generated": -570.8590698242188,
"logps/real": -378.61474609375,
"loss": 0.0159,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -17.853015899658203,
"rewards/margins": 13.683568000793457,
"rewards/real": -4.169447898864746,
"step": 2190
},
{
"epoch": 1.41,
"learning_rate": 2.937395858129017e-07,
"logits/generated": -1.9663385152816772,
"logits/real": -2.0112898349761963,
"logps/generated": -612.8561401367188,
"logps/real": -383.02532958984375,
"loss": 0.017,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.021343231201172,
"rewards/margins": 16.094024658203125,
"rewards/real": -2.927319288253784,
"step": 2200
},
{
"epoch": 1.42,
"learning_rate": 2.9254939300166625e-07,
"logits/generated": -1.9601085186004639,
"logits/real": -2.0084640979766846,
"logps/generated": -595.7770385742188,
"logps/real": -402.1640319824219,
"loss": 0.0046,
"rewards/accuracies": 1.0,
"rewards/generated": -18.758970260620117,
"rewards/margins": 15.546884536743164,
"rewards/real": -3.2120864391326904,
"step": 2210
},
{
"epoch": 1.43,
"learning_rate": 2.913592001904308e-07,
"logits/generated": -1.959242820739746,
"logits/real": -2.011341094970703,
"logps/generated": -582.9662475585938,
"logps/real": -326.1315002441406,
"loss": 0.0066,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.912372589111328,
"rewards/margins": 15.57469654083252,
"rewards/real": -3.3376784324645996,
"step": 2220
},
{
"epoch": 1.43,
"learning_rate": 2.9016900737919543e-07,
"logits/generated": -1.8837999105453491,
"logits/real": -2.0170726776123047,
"logps/generated": -615.4156494140625,
"logps/real": -384.8844299316406,
"loss": 0.008,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.660049438476562,
"rewards/margins": 15.770927429199219,
"rewards/real": -2.8891234397888184,
"step": 2230
},
{
"epoch": 1.44,
"learning_rate": 2.8897881456795997e-07,
"logits/generated": -1.8241097927093506,
"logits/real": -1.7869752645492554,
"logps/generated": -593.7149047851562,
"logps/real": -370.52789306640625,
"loss": 0.0204,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.744731903076172,
"rewards/margins": 15.868573188781738,
"rewards/real": -2.8761584758758545,
"step": 2240
},
{
"epoch": 1.45,
"learning_rate": 2.8778862175672456e-07,
"logits/generated": -1.7767345905303955,
"logits/real": -1.8023865222930908,
"logps/generated": -588.9293823242188,
"logps/real": -392.6181640625,
"loss": 0.0119,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.669166564941406,
"rewards/margins": 16.322322845458984,
"rewards/real": -2.346843957901001,
"step": 2250
},
{
"epoch": 1.45,
"learning_rate": 2.8659842894548915e-07,
"logits/generated": -1.7413629293441772,
"logits/real": -1.7947721481323242,
"logps/generated": -632.8668212890625,
"logps/real": -339.634765625,
"loss": 0.0052,
"rewards/accuracies": 1.0,
"rewards/generated": -21.01340103149414,
"rewards/margins": 17.00638771057129,
"rewards/real": -4.007015228271484,
"step": 2260
},
{
"epoch": 1.46,
"learning_rate": 2.8540823613425374e-07,
"logits/generated": -1.7241382598876953,
"logits/real": -1.8351795673370361,
"logps/generated": -553.7213134765625,
"logps/real": -371.4381408691406,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/generated": -18.440507888793945,
"rewards/margins": 13.914960861206055,
"rewards/real": -4.525545597076416,
"step": 2270
},
{
"epoch": 1.47,
"learning_rate": 2.8421804332301827e-07,
"logits/generated": -1.7572247982025146,
"logits/real": -1.8531087636947632,
"logps/generated": -680.1027221679688,
"logps/real": -414.3067321777344,
"loss": 0.0058,
"rewards/accuracies": 1.0,
"rewards/generated": -21.988712310791016,
"rewards/margins": 17.40224266052246,
"rewards/real": -4.586469650268555,
"step": 2280
},
{
"epoch": 1.47,
"learning_rate": 2.830278505117829e-07,
"logits/generated": -1.7346735000610352,
"logits/real": -1.8323335647583008,
"logps/generated": -684.58642578125,
"logps/real": -440.241943359375,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/generated": -23.51043128967285,
"rewards/margins": 17.93229103088379,
"rewards/real": -5.57813835144043,
"step": 2290
},
{
"epoch": 1.48,
"learning_rate": 2.8183765770054745e-07,
"logits/generated": -1.6866531372070312,
"logits/real": -1.7377674579620361,
"logps/generated": -626.2985229492188,
"logps/real": -398.7217102050781,
"loss": 0.0209,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -20.974733352661133,
"rewards/margins": 15.526639938354492,
"rewards/real": -5.448093414306641,
"step": 2300
},
{
"epoch": 1.48,
"learning_rate": 2.806474648893121e-07,
"logits/generated": -1.7734663486480713,
"logits/real": -1.852805733680725,
"logps/generated": -599.9769897460938,
"logps/real": -384.3834228515625,
"loss": 0.0074,
"rewards/accuracies": 1.0,
"rewards/generated": -23.156208038330078,
"rewards/margins": 17.898412704467773,
"rewards/real": -5.257795810699463,
"step": 2310
},
{
"epoch": 1.49,
"learning_rate": 2.7945727207807663e-07,
"logits/generated": -1.704843282699585,
"logits/real": -1.8438117504119873,
"logps/generated": -671.08935546875,
"logps/real": -363.78399658203125,
"loss": 0.0046,
"rewards/accuracies": 1.0,
"rewards/generated": -23.627620697021484,
"rewards/margins": 18.943744659423828,
"rewards/real": -4.6838765144348145,
"step": 2320
},
{
"epoch": 1.5,
"learning_rate": 2.782670792668412e-07,
"logits/generated": -1.8488250970840454,
"logits/real": -1.937170386314392,
"logps/generated": -617.1664428710938,
"logps/real": -413.4002990722656,
"loss": 0.0137,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.6809024810791,
"rewards/margins": 16.090904235839844,
"rewards/real": -5.5899977684021,
"step": 2330
},
{
"epoch": 1.5,
"learning_rate": 2.770768864556058e-07,
"logits/generated": -1.7172123193740845,
"logits/real": -1.8333232402801514,
"logps/generated": -608.66943359375,
"logps/real": -358.6962890625,
"loss": 0.0099,
"rewards/accuracies": 1.0,
"rewards/generated": -22.130813598632812,
"rewards/margins": 17.398597717285156,
"rewards/real": -4.732216835021973,
"step": 2340
},
{
"epoch": 1.51,
"learning_rate": 2.758866936443704e-07,
"logits/generated": -1.7257283926010132,
"logits/real": -1.8596134185791016,
"logps/generated": -620.2059936523438,
"logps/real": -376.4665222167969,
"loss": 0.006,
"rewards/accuracies": 1.0,
"rewards/generated": -22.570621490478516,
"rewards/margins": 18.450403213500977,
"rewards/real": -4.120217323303223,
"step": 2350
},
{
"epoch": 1.52,
"learning_rate": 2.7469650083313494e-07,
"logits/generated": -1.8226665258407593,
"logits/real": -1.9213998317718506,
"logps/generated": -523.0108642578125,
"logps/real": -348.8974609375,
"loss": 0.0208,
"rewards/accuracies": 1.0,
"rewards/generated": -18.249343872070312,
"rewards/margins": 14.304719924926758,
"rewards/real": -3.9446239471435547,
"step": 2360
},
{
"epoch": 1.52,
"learning_rate": 2.735063080218996e-07,
"logits/generated": -1.7809902429580688,
"logits/real": -1.861524224281311,
"logps/generated": -596.6590576171875,
"logps/real": -363.1610107421875,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -19.3269100189209,
"rewards/margins": 17.15434455871582,
"rewards/real": -2.1725666522979736,
"step": 2370
},
{
"epoch": 1.53,
"learning_rate": 2.723161152106641e-07,
"logits/generated": -1.6831693649291992,
"logits/real": -1.7770426273345947,
"logps/generated": -556.0060424804688,
"logps/real": -342.6446838378906,
"loss": 0.0118,
"rewards/accuracies": 1.0,
"rewards/generated": -19.728736877441406,
"rewards/margins": 15.71430492401123,
"rewards/real": -4.01443338394165,
"step": 2380
},
{
"epoch": 1.54,
"learning_rate": 2.711259223994287e-07,
"logits/generated": -1.7193806171417236,
"logits/real": -1.7846571207046509,
"logps/generated": -513.1950073242188,
"logps/real": -321.84466552734375,
"loss": 0.0152,
"rewards/accuracies": 1.0,
"rewards/generated": -18.99886131286621,
"rewards/margins": 14.975049018859863,
"rewards/real": -4.023811340332031,
"step": 2390
},
{
"epoch": 1.54,
"learning_rate": 2.699357295881933e-07,
"logits/generated": -1.696411371231079,
"logits/real": -1.8342005014419556,
"logps/generated": -525.8536376953125,
"logps/real": -306.38720703125,
"loss": 0.0173,
"rewards/accuracies": 1.0,
"rewards/generated": -17.83527374267578,
"rewards/margins": 13.64979076385498,
"rewards/real": -4.185482978820801,
"step": 2400
},
{
"epoch": 1.55,
"learning_rate": 2.687455367769579e-07,
"logits/generated": -1.777260184288025,
"logits/real": -1.8473188877105713,
"logps/generated": -579.9354248046875,
"logps/real": -359.2183532714844,
"loss": 0.0358,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -16.67335319519043,
"rewards/margins": 13.895036697387695,
"rewards/real": -2.77831768989563,
"step": 2410
},
{
"epoch": 1.56,
"learning_rate": 2.675553439657224e-07,
"logits/generated": -1.7697776556015015,
"logits/real": -1.8199243545532227,
"logps/generated": -516.1290893554688,
"logps/real": -342.9108581542969,
"loss": 0.0067,
"rewards/accuracies": 1.0,
"rewards/generated": -15.577775955200195,
"rewards/margins": 13.427032470703125,
"rewards/real": -2.150745391845703,
"step": 2420
},
{
"epoch": 1.56,
"learning_rate": 2.66365151154487e-07,
"logits/generated": -1.768214464187622,
"logits/real": -1.8642857074737549,
"logps/generated": -514.9273681640625,
"logps/real": -360.794677734375,
"loss": 0.0057,
"rewards/accuracies": 1.0,
"rewards/generated": -16.164379119873047,
"rewards/margins": 13.772100448608398,
"rewards/real": -2.3922770023345947,
"step": 2430
},
{
"epoch": 1.57,
"learning_rate": 2.651749583432516e-07,
"logits/generated": -1.8366386890411377,
"logits/real": -1.8158714771270752,
"logps/generated": -596.3200073242188,
"logps/real": -362.0704650878906,
"loss": 0.0089,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -17.17621421813965,
"rewards/margins": 14.673547744750977,
"rewards/real": -2.5026676654815674,
"step": 2440
},
{
"epoch": 1.57,
"learning_rate": 2.6398476553201614e-07,
"logits/generated": -1.715486764907837,
"logits/real": -1.7608009576797485,
"logps/generated": -592.689208984375,
"logps/real": -394.7555236816406,
"loss": 0.0177,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.238231658935547,
"rewards/margins": 15.920003890991211,
"rewards/real": -2.318227767944336,
"step": 2450
},
{
"epoch": 1.58,
"learning_rate": 2.627945727207808e-07,
"logits/generated": -1.8307558298110962,
"logits/real": -1.8600902557373047,
"logps/generated": -560.46142578125,
"logps/real": -370.09747314453125,
"loss": 0.0095,
"rewards/accuracies": 1.0,
"rewards/generated": -18.495569229125977,
"rewards/margins": 14.71654987335205,
"rewards/real": -3.7790215015411377,
"step": 2460
},
{
"epoch": 1.59,
"learning_rate": 2.616043799095453e-07,
"logits/generated": -1.7307708263397217,
"logits/real": -1.7592779397964478,
"logps/generated": -584.9305419921875,
"logps/real": -359.0640869140625,
"loss": 0.0073,
"rewards/accuracies": 1.0,
"rewards/generated": -19.275348663330078,
"rewards/margins": 14.876652717590332,
"rewards/real": -4.398694038391113,
"step": 2470
},
{
"epoch": 1.59,
"learning_rate": 2.604141870983099e-07,
"logits/generated": -1.702275037765503,
"logits/real": -1.7664591073989868,
"logps/generated": -556.9425048828125,
"logps/real": -366.8645935058594,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/generated": -19.035137176513672,
"rewards/margins": 15.076634407043457,
"rewards/real": -3.9585037231445312,
"step": 2480
},
{
"epoch": 1.6,
"learning_rate": 2.592239942870745e-07,
"logits/generated": -1.8040691614151,
"logits/real": -1.831241250038147,
"logps/generated": -605.1649780273438,
"logps/real": -383.25506591796875,
"loss": 0.0355,
"rewards/accuracies": 1.0,
"rewards/generated": -18.789392471313477,
"rewards/margins": 15.085497856140137,
"rewards/real": -3.7038941383361816,
"step": 2490
},
{
"epoch": 1.61,
"learning_rate": 2.580338014758391e-07,
"logits/generated": -1.9082372188568115,
"logits/real": -1.964238166809082,
"logps/generated": -597.7811889648438,
"logps/real": -401.4377746582031,
"loss": 0.0061,
"rewards/accuracies": 1.0,
"rewards/generated": -18.443161010742188,
"rewards/margins": 15.85346794128418,
"rewards/real": -2.589694023132324,
"step": 2500
},
{
"epoch": 1.61,
"learning_rate": 2.568436086646036e-07,
"logits/generated": -1.999436616897583,
"logits/real": -1.951202154159546,
"logps/generated": -578.76806640625,
"logps/real": -361.1490478515625,
"loss": 0.0065,
"rewards/accuracies": 1.0,
"rewards/generated": -17.621252059936523,
"rewards/margins": 13.709065437316895,
"rewards/real": -3.9121880531311035,
"step": 2510
},
{
"epoch": 1.62,
"learning_rate": 2.5565341585336827e-07,
"logits/generated": -1.8233203887939453,
"logits/real": -1.8996546268463135,
"logps/generated": -575.7025756835938,
"logps/real": -398.1545715332031,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -18.55044174194336,
"rewards/margins": 15.433720588684082,
"rewards/real": -3.1167218685150146,
"step": 2520
},
{
"epoch": 1.63,
"learning_rate": 2.544632230421328e-07,
"logits/generated": -1.8319776058197021,
"logits/real": -1.9016917943954468,
"logps/generated": -609.1539306640625,
"logps/real": -345.43536376953125,
"loss": 0.0132,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.780593872070312,
"rewards/margins": 17.442068099975586,
"rewards/real": -2.338524341583252,
"step": 2530
},
{
"epoch": 1.63,
"learning_rate": 2.532730302308974e-07,
"logits/generated": -1.9279365539550781,
"logits/real": -1.9061311483383179,
"logps/generated": -566.6038818359375,
"logps/real": -330.6295166015625,
"loss": 0.0103,
"rewards/accuracies": 1.0,
"rewards/generated": -19.055160522460938,
"rewards/margins": 15.601313591003418,
"rewards/real": -3.4538471698760986,
"step": 2540
},
{
"epoch": 1.64,
"learning_rate": 2.52082837419662e-07,
"logits/generated": -1.8530943393707275,
"logits/real": -1.919136643409729,
"logps/generated": -604.7074584960938,
"logps/real": -362.3363342285156,
"loss": 0.0112,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.198497772216797,
"rewards/margins": 16.335779190063477,
"rewards/real": -2.8627171516418457,
"step": 2550
},
{
"epoch": 1.65,
"learning_rate": 2.5089264460842657e-07,
"logits/generated": -1.953850507736206,
"logits/real": -1.872309923171997,
"logps/generated": -618.9617309570312,
"logps/real": -378.0514831542969,
"loss": 0.0201,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.868818283081055,
"rewards/margins": 15.176447868347168,
"rewards/real": -3.692370653152466,
"step": 2560
},
{
"epoch": 1.65,
"learning_rate": 2.497024517971911e-07,
"logits/generated": -1.8568464517593384,
"logits/real": -1.887955665588379,
"logps/generated": -605.1824340820312,
"logps/real": -405.60235595703125,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/generated": -18.905994415283203,
"rewards/margins": 15.54051685333252,
"rewards/real": -3.3654770851135254,
"step": 2570
},
{
"epoch": 1.66,
"learning_rate": 2.485122589859557e-07,
"logits/generated": -1.8930320739746094,
"logits/real": -1.8917458057403564,
"logps/generated": -584.1777954101562,
"logps/real": -337.369384765625,
"loss": 0.0201,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -21.138484954833984,
"rewards/margins": 17.352680206298828,
"rewards/real": -3.785806179046631,
"step": 2580
},
{
"epoch": 1.66,
"learning_rate": 2.473220661747203e-07,
"logits/generated": -1.8552837371826172,
"logits/real": -1.9193109273910522,
"logps/generated": -608.6238403320312,
"logps/real": -370.24603271484375,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/generated": -19.182647705078125,
"rewards/margins": 16.14703369140625,
"rewards/real": -3.035613536834717,
"step": 2590
},
{
"epoch": 1.67,
"learning_rate": 2.461318733634849e-07,
"logits/generated": -1.7766849994659424,
"logits/real": -1.8722776174545288,
"logps/generated": -549.0695190429688,
"logps/real": -349.0986022949219,
"loss": 0.013,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.278690338134766,
"rewards/margins": 16.906200408935547,
"rewards/real": -2.3724896907806396,
"step": 2600
},
{
"epoch": 1.68,
"learning_rate": 2.4494168055224947e-07,
"logits/generated": -1.9358885288238525,
"logits/real": -1.9020036458969116,
"logps/generated": -619.1986694335938,
"logps/real": -427.21258544921875,
"loss": 0.0078,
"rewards/accuracies": 1.0,
"rewards/generated": -18.58693504333496,
"rewards/margins": 15.541351318359375,
"rewards/real": -3.0455851554870605,
"step": 2610
},
{
"epoch": 1.68,
"learning_rate": 2.4375148774101406e-07,
"logits/generated": -1.799574613571167,
"logits/real": -1.8969202041625977,
"logps/generated": -522.123046875,
"logps/real": -333.136474609375,
"loss": 0.0125,
"rewards/accuracies": 1.0,
"rewards/generated": -18.633255004882812,
"rewards/margins": 15.599235534667969,
"rewards/real": -3.0340187549591064,
"step": 2620
},
{
"epoch": 1.69,
"learning_rate": 2.425612949297786e-07,
"logits/generated": -1.8291136026382446,
"logits/real": -1.8533122539520264,
"logps/generated": -593.2408447265625,
"logps/real": -402.7211608886719,
"loss": 0.0054,
"rewards/accuracies": 1.0,
"rewards/generated": -19.01093101501465,
"rewards/margins": 15.119882583618164,
"rewards/real": -3.8910484313964844,
"step": 2630
},
{
"epoch": 1.7,
"learning_rate": 2.413711021185432e-07,
"logits/generated": -1.8732038736343384,
"logits/real": -1.8730173110961914,
"logps/generated": -600.4080810546875,
"logps/real": -323.9451904296875,
"loss": 0.0105,
"rewards/accuracies": 1.0,
"rewards/generated": -20.80618667602539,
"rewards/margins": 17.078380584716797,
"rewards/real": -3.7278037071228027,
"step": 2640
},
{
"epoch": 1.7,
"learning_rate": 2.401809093073078e-07,
"logits/generated": -1.8166701793670654,
"logits/real": -1.8924024105072021,
"logps/generated": -576.9189453125,
"logps/real": -372.4144592285156,
"loss": 0.0049,
"rewards/accuracies": 1.0,
"rewards/generated": -19.3643798828125,
"rewards/margins": 15.9496488571167,
"rewards/real": -3.4147305488586426,
"step": 2650
},
{
"epoch": 1.71,
"learning_rate": 2.3899071649607236e-07,
"logits/generated": -1.8222957849502563,
"logits/real": -1.7936245203018188,
"logps/generated": -639.0947265625,
"logps/real": -437.8866271972656,
"loss": 0.012,
"rewards/accuracies": 1.0,
"rewards/generated": -18.86641502380371,
"rewards/margins": 15.062356948852539,
"rewards/real": -3.8040592670440674,
"step": 2660
},
{
"epoch": 1.72,
"learning_rate": 2.3780052368483693e-07,
"logits/generated": -1.6525121927261353,
"logits/real": -1.7323639392852783,
"logps/generated": -528.6134033203125,
"logps/real": -337.75048828125,
"loss": 0.0086,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.23191261291504,
"rewards/margins": 15.418153762817383,
"rewards/real": -4.813759803771973,
"step": 2670
},
{
"epoch": 1.72,
"learning_rate": 2.3661033087360152e-07,
"logits/generated": -1.8742326498031616,
"logits/real": -1.941476583480835,
"logps/generated": -597.1447143554688,
"logps/real": -358.0196838378906,
"loss": 0.0121,
"rewards/accuracies": 1.0,
"rewards/generated": -18.783157348632812,
"rewards/margins": 15.727224349975586,
"rewards/real": -3.055934190750122,
"step": 2680
},
{
"epoch": 1.73,
"learning_rate": 2.354201380623661e-07,
"logits/generated": -1.8624379634857178,
"logits/real": -1.893402099609375,
"logps/generated": -629.6782836914062,
"logps/real": -406.6865234375,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -20.850439071655273,
"rewards/margins": 16.49611473083496,
"rewards/real": -4.35432243347168,
"step": 2690
},
{
"epoch": 1.74,
"learning_rate": 2.342299452511307e-07,
"logits/generated": -1.8548250198364258,
"logits/real": -1.8583017587661743,
"logps/generated": -582.7992553710938,
"logps/real": -323.04205322265625,
"loss": 0.0047,
"rewards/accuracies": 1.0,
"rewards/generated": -22.348865509033203,
"rewards/margins": 18.243560791015625,
"rewards/real": -4.105301856994629,
"step": 2700
},
{
"epoch": 1.74,
"learning_rate": 2.3303975243989526e-07,
"logits/generated": -1.9024966955184937,
"logits/real": -2.017508029937744,
"logps/generated": -574.1644287109375,
"logps/real": -349.75714111328125,
"loss": 0.03,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.862314224243164,
"rewards/margins": 17.064558029174805,
"rewards/real": -3.7977538108825684,
"step": 2710
},
{
"epoch": 1.75,
"learning_rate": 2.3184955962865982e-07,
"logits/generated": -1.9994853734970093,
"logits/real": -2.0445475578308105,
"logps/generated": -562.1621704101562,
"logps/real": -349.68695068359375,
"loss": 0.0202,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.807878494262695,
"rewards/margins": 17.700210571289062,
"rewards/real": -4.107669353485107,
"step": 2720
},
{
"epoch": 1.75,
"learning_rate": 2.306593668174244e-07,
"logits/generated": -1.9293378591537476,
"logits/real": -2.0241332054138184,
"logps/generated": -584.3340454101562,
"logps/real": -374.9560546875,
"loss": 0.0256,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -19.921669006347656,
"rewards/margins": 16.468935012817383,
"rewards/real": -3.452733278274536,
"step": 2730
},
{
"epoch": 1.76,
"learning_rate": 2.2946917400618898e-07,
"logits/generated": -1.894095778465271,
"logits/real": -1.973131775856018,
"logps/generated": -621.0687866210938,
"logps/real": -442.6654357910156,
"loss": 0.0073,
"rewards/accuracies": 1.0,
"rewards/generated": -21.7646427154541,
"rewards/margins": 17.236825942993164,
"rewards/real": -4.527815818786621,
"step": 2740
},
{
"epoch": 1.77,
"learning_rate": 2.2827898119495357e-07,
"logits/generated": -1.908696174621582,
"logits/real": -1.8996613025665283,
"logps/generated": -641.0946044921875,
"logps/real": -396.1679992675781,
"loss": 0.0032,
"rewards/accuracies": 1.0,
"rewards/generated": -22.93410873413086,
"rewards/margins": 18.131559371948242,
"rewards/real": -4.802548885345459,
"step": 2750
},
{
"epoch": 1.77,
"learning_rate": 2.2708878838371816e-07,
"logits/generated": -1.887770414352417,
"logits/real": -1.9259475469589233,
"logps/generated": -641.0634765625,
"logps/real": -353.3055419921875,
"loss": 0.0061,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.32883071899414,
"rewards/margins": 18.148107528686523,
"rewards/real": -4.18071985244751,
"step": 2760
},
{
"epoch": 1.78,
"learning_rate": 2.2589859557248272e-07,
"logits/generated": -1.931406021118164,
"logits/real": -1.9512712955474854,
"logps/generated": -613.9124755859375,
"logps/real": -410.6527404785156,
"loss": 0.0061,
"rewards/accuracies": 1.0,
"rewards/generated": -21.747358322143555,
"rewards/margins": 16.431184768676758,
"rewards/real": -5.31617546081543,
"step": 2770
},
{
"epoch": 1.79,
"learning_rate": 2.247084027612473e-07,
"logits/generated": -1.9454374313354492,
"logits/real": -1.9846471548080444,
"logps/generated": -613.2532348632812,
"logps/real": -419.8189392089844,
"loss": 0.0369,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.502111434936523,
"rewards/margins": 19.032825469970703,
"rewards/real": -3.4692866802215576,
"step": 2780
},
{
"epoch": 1.79,
"learning_rate": 2.235182099500119e-07,
"logits/generated": -1.943305253982544,
"logits/real": -1.9613683223724365,
"logps/generated": -590.8385620117188,
"logps/real": -376.8114013671875,
"loss": 0.0201,
"rewards/accuracies": 1.0,
"rewards/generated": -18.899354934692383,
"rewards/margins": 15.790725708007812,
"rewards/real": -3.108628273010254,
"step": 2790
},
{
"epoch": 1.8,
"learning_rate": 2.2232801713877646e-07,
"logits/generated": -1.969151258468628,
"logits/real": -2.0842716693878174,
"logps/generated": -567.1388549804688,
"logps/real": -386.68927001953125,
"loss": 0.0082,
"rewards/accuracies": 1.0,
"rewards/generated": -19.092025756835938,
"rewards/margins": 14.683004379272461,
"rewards/real": -4.409019947052002,
"step": 2800
},
{
"epoch": 1.81,
"learning_rate": 2.2113782432754105e-07,
"logits/generated": -2.0907585620880127,
"logits/real": -2.102865219116211,
"logps/generated": -599.5132446289062,
"logps/real": -384.66351318359375,
"loss": 0.0765,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -20.033435821533203,
"rewards/margins": 15.8507080078125,
"rewards/real": -4.182726860046387,
"step": 2810
},
{
"epoch": 1.81,
"learning_rate": 2.1994763151630564e-07,
"logits/generated": -2.1723508834838867,
"logits/real": -2.1705594062805176,
"logps/generated": -512.1658935546875,
"logps/real": -349.0197448730469,
"loss": 0.014,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -16.58482551574707,
"rewards/margins": 14.868623733520508,
"rewards/real": -1.716202735900879,
"step": 2820
},
{
"epoch": 1.82,
"learning_rate": 2.187574387050702e-07,
"logits/generated": -2.1666464805603027,
"logits/real": -2.1792376041412354,
"logps/generated": -632.5197143554688,
"logps/real": -366.4725646972656,
"loss": 0.0058,
"rewards/accuracies": 1.0,
"rewards/generated": -18.97044563293457,
"rewards/margins": 16.773412704467773,
"rewards/real": -2.1970319747924805,
"step": 2830
},
{
"epoch": 1.83,
"learning_rate": 2.175672458938348e-07,
"logits/generated": -2.1748318672180176,
"logits/real": -2.1950387954711914,
"logps/generated": -549.257568359375,
"logps/real": -374.5805969238281,
"loss": 0.0105,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.554750442504883,
"rewards/margins": 15.931724548339844,
"rewards/real": -2.6230263710021973,
"step": 2840
},
{
"epoch": 1.83,
"learning_rate": 2.1637705308259938e-07,
"logits/generated": -2.2087433338165283,
"logits/real": -2.2343385219573975,
"logps/generated": -569.7178344726562,
"logps/real": -363.35565185546875,
"loss": 0.02,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -18.506458282470703,
"rewards/margins": 15.537744522094727,
"rewards/real": -2.968712568283081,
"step": 2850
},
{
"epoch": 1.84,
"learning_rate": 2.1518686027136395e-07,
"logits/generated": -2.141085624694824,
"logits/real": -2.2068240642547607,
"logps/generated": -559.6201171875,
"logps/real": -355.288818359375,
"loss": 0.0092,
"rewards/accuracies": 1.0,
"rewards/generated": -17.508007049560547,
"rewards/margins": 13.753092765808105,
"rewards/real": -3.754911422729492,
"step": 2860
},
{
"epoch": 1.84,
"learning_rate": 2.1399666746012854e-07,
"logits/generated": -2.1633191108703613,
"logits/real": -2.1534600257873535,
"logps/generated": -557.6275024414062,
"logps/real": -355.95733642578125,
"loss": 0.0122,
"rewards/accuracies": 1.0,
"rewards/generated": -18.064023971557617,
"rewards/margins": 13.931376457214355,
"rewards/real": -4.132648468017578,
"step": 2870
},
{
"epoch": 1.85,
"learning_rate": 2.1280647464889313e-07,
"logits/generated": -2.146686315536499,
"logits/real": -2.154139280319214,
"logps/generated": -617.3681030273438,
"logps/real": -377.43853759765625,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/generated": -20.22401237487793,
"rewards/margins": 16.329097747802734,
"rewards/real": -3.8949122428894043,
"step": 2880
},
{
"epoch": 1.86,
"learning_rate": 2.116162818376577e-07,
"logits/generated": -2.074739456176758,
"logits/real": -2.1095595359802246,
"logps/generated": -574.0628051757812,
"logps/real": -360.5137023925781,
"loss": 0.0033,
"rewards/accuracies": 1.0,
"rewards/generated": -20.636310577392578,
"rewards/margins": 17.622827529907227,
"rewards/real": -3.013484477996826,
"step": 2890
},
{
"epoch": 1.86,
"learning_rate": 2.1042608902642228e-07,
"logits/generated": -2.1206960678100586,
"logits/real": -2.20168399810791,
"logps/generated": -583.8810424804688,
"logps/real": -382.8518371582031,
"loss": 0.0146,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.312095642089844,
"rewards/margins": 15.806414604187012,
"rewards/real": -4.505680561065674,
"step": 2900
},
{
"epoch": 1.87,
"learning_rate": 2.0923589621518687e-07,
"logits/generated": -2.0229172706604004,
"logits/real": -2.0557923316955566,
"logps/generated": -597.6458740234375,
"logps/real": -395.81646728515625,
"loss": 0.0073,
"rewards/accuracies": 1.0,
"rewards/generated": -20.894927978515625,
"rewards/margins": 17.441539764404297,
"rewards/real": -3.453387498855591,
"step": 2910
},
{
"epoch": 1.88,
"learning_rate": 2.080457034039514e-07,
"logits/generated": -1.9580085277557373,
"logits/real": -2.1535487174987793,
"logps/generated": -578.9662475585938,
"logps/real": -369.46197509765625,
"loss": 0.0045,
"rewards/accuracies": 1.0,
"rewards/generated": -18.469118118286133,
"rewards/margins": 14.781881332397461,
"rewards/real": -3.687236785888672,
"step": 2920
},
{
"epoch": 1.88,
"learning_rate": 2.06855510592716e-07,
"logits/generated": -2.0475711822509766,
"logits/real": -2.0905933380126953,
"logps/generated": -579.7207641601562,
"logps/real": -362.5934143066406,
"loss": 0.0442,
"rewards/accuracies": 1.0,
"rewards/generated": -19.495128631591797,
"rewards/margins": 16.245441436767578,
"rewards/real": -3.249690294265747,
"step": 2930
},
{
"epoch": 1.89,
"learning_rate": 2.0566531778148059e-07,
"logits/generated": -1.9959481954574585,
"logits/real": -2.1353626251220703,
"logps/generated": -564.4955444335938,
"logps/real": -383.1506042480469,
"loss": 0.0164,
"rewards/accuracies": 1.0,
"rewards/generated": -18.533605575561523,
"rewards/margins": 16.09157943725586,
"rewards/real": -2.4420278072357178,
"step": 2940
},
{
"epoch": 1.9,
"learning_rate": 2.0447512497024515e-07,
"logits/generated": -2.084301233291626,
"logits/real": -2.0366158485412598,
"logps/generated": -518.3314208984375,
"logps/real": -350.7093505859375,
"loss": 0.0225,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.012195587158203,
"rewards/margins": 15.021069526672363,
"rewards/real": -2.9911255836486816,
"step": 2950
},
{
"epoch": 1.9,
"learning_rate": 2.0328493215900974e-07,
"logits/generated": -2.1363632678985596,
"logits/real": -2.122278928756714,
"logps/generated": -602.5611572265625,
"logps/real": -397.9182434082031,
"loss": 0.0054,
"rewards/accuracies": 1.0,
"rewards/generated": -20.253149032592773,
"rewards/margins": 17.09138298034668,
"rewards/real": -3.1617660522460938,
"step": 2960
},
{
"epoch": 1.91,
"learning_rate": 2.0209473934777433e-07,
"logits/generated": -2.134699821472168,
"logits/real": -2.08998441696167,
"logps/generated": -580.0136108398438,
"logps/real": -359.6705322265625,
"loss": 0.0042,
"rewards/accuracies": 1.0,
"rewards/generated": -19.094505310058594,
"rewards/margins": 15.466270446777344,
"rewards/real": -3.628235340118408,
"step": 2970
},
{
"epoch": 1.92,
"learning_rate": 2.0090454653653892e-07,
"logits/generated": -2.042788028717041,
"logits/real": -2.0056633949279785,
"logps/generated": -532.9696044921875,
"logps/real": -333.90618896484375,
"loss": 0.0087,
"rewards/accuracies": 1.0,
"rewards/generated": -18.65140151977539,
"rewards/margins": 15.834383010864258,
"rewards/real": -2.8170199394226074,
"step": 2980
},
{
"epoch": 1.92,
"learning_rate": 1.9971435372530348e-07,
"logits/generated": -2.0326991081237793,
"logits/real": -2.0445380210876465,
"logps/generated": -636.9520263671875,
"logps/real": -420.206787109375,
"loss": 0.0064,
"rewards/accuracies": 1.0,
"rewards/generated": -20.007139205932617,
"rewards/margins": 16.871540069580078,
"rewards/real": -3.1356008052825928,
"step": 2990
},
{
"epoch": 1.93,
"learning_rate": 1.9852416091406807e-07,
"logits/generated": -2.16467022895813,
"logits/real": -2.151834011077881,
"logps/generated": -608.9406127929688,
"logps/real": -386.32403564453125,
"loss": 0.0102,
"rewards/accuracies": 1.0,
"rewards/generated": -19.408123016357422,
"rewards/margins": 15.358880996704102,
"rewards/real": -4.049244403839111,
"step": 3000
},
{
"epoch": 1.93,
"learning_rate": 1.9733396810283266e-07,
"logits/generated": -1.994359016418457,
"logits/real": -2.083233594894409,
"logps/generated": -580.0289916992188,
"logps/real": -387.74285888671875,
"loss": 0.0055,
"rewards/accuracies": 1.0,
"rewards/generated": -19.273834228515625,
"rewards/margins": 16.140888214111328,
"rewards/real": -3.1329457759857178,
"step": 3010
},
{
"epoch": 1.94,
"learning_rate": 1.9614377529159722e-07,
"logits/generated": -1.9821665287017822,
"logits/real": -1.9590924978256226,
"logps/generated": -658.1160278320312,
"logps/real": -387.3288879394531,
"loss": 0.0084,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.883031845092773,
"rewards/margins": 18.182018280029297,
"rewards/real": -3.7010135650634766,
"step": 3020
},
{
"epoch": 1.95,
"learning_rate": 1.9495358248036181e-07,
"logits/generated": -1.8935340642929077,
"logits/real": -1.9233297109603882,
"logps/generated": -587.504638671875,
"logps/real": -379.6001281738281,
"loss": 0.0289,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.569808959960938,
"rewards/margins": 16.9621639251709,
"rewards/real": -4.607644081115723,
"step": 3030
},
{
"epoch": 1.95,
"learning_rate": 1.937633896691264e-07,
"logits/generated": -2.0384740829467773,
"logits/real": -2.0107827186584473,
"logps/generated": -560.0950927734375,
"logps/real": -348.64178466796875,
"loss": 0.0107,
"rewards/accuracies": 1.0,
"rewards/generated": -20.69436264038086,
"rewards/margins": 16.582056045532227,
"rewards/real": -4.112307548522949,
"step": 3040
},
{
"epoch": 1.96,
"learning_rate": 1.9257319685789097e-07,
"logits/generated": -1.8694576025009155,
"logits/real": -1.8919403553009033,
"logps/generated": -610.2615966796875,
"logps/real": -373.0062561035156,
"loss": 0.0144,
"rewards/accuracies": 1.0,
"rewards/generated": -21.5854434967041,
"rewards/margins": 17.94769287109375,
"rewards/real": -3.637749433517456,
"step": 3050
},
{
"epoch": 1.97,
"learning_rate": 1.9138300404665556e-07,
"logits/generated": -1.833876371383667,
"logits/real": -1.923029899597168,
"logps/generated": -625.2069091796875,
"logps/real": -445.4774475097656,
"loss": 0.0148,
"rewards/accuracies": 1.0,
"rewards/generated": -21.171947479248047,
"rewards/margins": 16.788434982299805,
"rewards/real": -4.383509635925293,
"step": 3060
},
{
"epoch": 1.97,
"learning_rate": 1.9019281123542015e-07,
"logits/generated": -1.8446261882781982,
"logits/real": -1.9809293746948242,
"logps/generated": -639.4974365234375,
"logps/real": -433.82843017578125,
"loss": 0.01,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.332645416259766,
"rewards/margins": 17.92720603942871,
"rewards/real": -4.405437469482422,
"step": 3070
},
{
"epoch": 1.98,
"learning_rate": 1.890026184241847e-07,
"logits/generated": -1.794547438621521,
"logits/real": -1.902682900428772,
"logps/generated": -686.3941650390625,
"logps/real": -412.7117614746094,
"loss": 0.0142,
"rewards/accuracies": 1.0,
"rewards/generated": -23.83676528930664,
"rewards/margins": 18.992544174194336,
"rewards/real": -4.844220161437988,
"step": 3080
},
{
"epoch": 1.99,
"learning_rate": 1.878124256129493e-07,
"logits/generated": -1.771178960800171,
"logits/real": -1.8597948551177979,
"logps/generated": -650.177490234375,
"logps/real": -385.32281494140625,
"loss": 0.0082,
"rewards/accuracies": 1.0,
"rewards/generated": -22.727323532104492,
"rewards/margins": 18.738880157470703,
"rewards/real": -3.9884445667266846,
"step": 3090
},
{
"epoch": 1.99,
"learning_rate": 1.866222328017139e-07,
"logits/generated": -1.7059656381607056,
"logits/real": -1.7673842906951904,
"logps/generated": -622.1861572265625,
"logps/real": -364.33526611328125,
"loss": 0.0049,
"rewards/accuracies": 1.0,
"rewards/generated": -22.290287017822266,
"rewards/margins": 17.137935638427734,
"rewards/real": -5.152352809906006,
"step": 3100
},
{
"epoch": 2.0,
"learning_rate": 1.8543203999047845e-07,
"logits/generated": -1.7726774215698242,
"logits/real": -1.9006750583648682,
"logps/generated": -613.3416748046875,
"logps/real": -358.4165954589844,
"loss": 0.0114,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.165300369262695,
"rewards/margins": 17.081605911254883,
"rewards/real": -5.08369255065918,
"step": 3110
},
{
"epoch": 2.01,
"learning_rate": 1.8424184717924304e-07,
"logits/generated": -1.7835719585418701,
"logits/real": -1.8426719903945923,
"logps/generated": -601.4617919921875,
"logps/real": -340.8309631347656,
"loss": 0.0035,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.10822868347168,
"rewards/margins": 17.62343978881836,
"rewards/real": -4.48478889465332,
"step": 3120
},
{
"epoch": 2.01,
"learning_rate": 1.830516543680076e-07,
"logits/generated": -1.864492416381836,
"logits/real": -1.931165099143982,
"logps/generated": -646.0354614257812,
"logps/real": -399.3186340332031,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -22.49277114868164,
"rewards/margins": 17.703113555908203,
"rewards/real": -4.789654731750488,
"step": 3130
},
{
"epoch": 2.02,
"learning_rate": 1.8186146155677217e-07,
"logits/generated": -1.6619055271148682,
"logits/real": -1.682189702987671,
"logps/generated": -651.1900634765625,
"logps/real": -430.13775634765625,
"loss": 0.0067,
"rewards/accuracies": 1.0,
"rewards/generated": -23.825260162353516,
"rewards/margins": 19.324203491210938,
"rewards/real": -4.501051902770996,
"step": 3140
},
{
"epoch": 2.02,
"learning_rate": 1.8067126874553676e-07,
"logits/generated": -1.8066043853759766,
"logits/real": -1.857184648513794,
"logps/generated": -644.6920166015625,
"logps/real": -422.68731689453125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -23.160465240478516,
"rewards/margins": 18.2052001953125,
"rewards/real": -4.955265045166016,
"step": 3150
},
{
"epoch": 2.03,
"learning_rate": 1.7948107593430135e-07,
"logits/generated": -1.7574580907821655,
"logits/real": -1.6988914012908936,
"logps/generated": -635.5706176757812,
"logps/real": -414.3448791503906,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -22.139110565185547,
"rewards/margins": 17.725711822509766,
"rewards/real": -4.41339635848999,
"step": 3160
},
{
"epoch": 2.04,
"learning_rate": 1.782908831230659e-07,
"logits/generated": -1.7514142990112305,
"logits/real": -1.833495855331421,
"logps/generated": -635.5641479492188,
"logps/real": -391.35406494140625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -24.829418182373047,
"rewards/margins": 19.86545753479004,
"rewards/real": -4.963961601257324,
"step": 3170
},
{
"epoch": 2.04,
"learning_rate": 1.771006903118305e-07,
"logits/generated": -1.8058671951293945,
"logits/real": -1.84176504611969,
"logps/generated": -596.8150634765625,
"logps/real": -392.7447814941406,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -23.030054092407227,
"rewards/margins": 17.652753829956055,
"rewards/real": -5.37730073928833,
"step": 3180
},
{
"epoch": 2.05,
"learning_rate": 1.759104975005951e-07,
"logits/generated": -1.7943785190582275,
"logits/real": -1.8919038772583008,
"logps/generated": -616.5109252929688,
"logps/real": -351.06903076171875,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -24.05794334411621,
"rewards/margins": 18.479902267456055,
"rewards/real": -5.578042984008789,
"step": 3190
},
{
"epoch": 2.06,
"learning_rate": 1.7472030468935965e-07,
"logits/generated": -1.7733243703842163,
"logits/real": -1.8298814296722412,
"logps/generated": -644.5895385742188,
"logps/real": -346.65716552734375,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/generated": -23.519426345825195,
"rewards/margins": 19.296615600585938,
"rewards/real": -4.222809791564941,
"step": 3200
},
{
"epoch": 2.06,
"learning_rate": 1.7353011187812424e-07,
"logits/generated": -1.7913854122161865,
"logits/real": -1.7636489868164062,
"logps/generated": -630.2733154296875,
"logps/real": -346.322021484375,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/generated": -24.583568572998047,
"rewards/margins": 19.213102340698242,
"rewards/real": -5.3704681396484375,
"step": 3210
},
{
"epoch": 2.07,
"learning_rate": 1.7233991906688883e-07,
"logits/generated": -1.693394422531128,
"logits/real": -1.777193307876587,
"logps/generated": -647.3377685546875,
"logps/real": -437.0379333496094,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/generated": -24.571914672851562,
"rewards/margins": 18.28937530517578,
"rewards/real": -6.282541751861572,
"step": 3220
},
{
"epoch": 2.08,
"learning_rate": 1.7114972625565342e-07,
"logits/generated": -1.7938833236694336,
"logits/real": -1.8682388067245483,
"logps/generated": -612.8692016601562,
"logps/real": -373.411865234375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -23.837581634521484,
"rewards/margins": 18.990482330322266,
"rewards/real": -4.847100734710693,
"step": 3230
},
{
"epoch": 2.08,
"learning_rate": 1.6995953344441799e-07,
"logits/generated": -1.5872770547866821,
"logits/real": -1.749686598777771,
"logps/generated": -612.15576171875,
"logps/real": -354.6670837402344,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -24.107717514038086,
"rewards/margins": 19.196178436279297,
"rewards/real": -4.911539554595947,
"step": 3240
},
{
"epoch": 2.09,
"learning_rate": 1.6876934063318258e-07,
"logits/generated": -1.744523048400879,
"logits/real": -1.7055385112762451,
"logps/generated": -612.4163818359375,
"logps/real": -340.0469055175781,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -26.122411727905273,
"rewards/margins": 20.10819435119629,
"rewards/real": -6.014217376708984,
"step": 3250
},
{
"epoch": 2.1,
"learning_rate": 1.6757914782194717e-07,
"logits/generated": -1.761125922203064,
"logits/real": -1.713313102722168,
"logps/generated": -677.6751708984375,
"logps/real": -460.227294921875,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -24.481826782226562,
"rewards/margins": 20.24111557006836,
"rewards/real": -4.240714073181152,
"step": 3260
},
{
"epoch": 2.1,
"learning_rate": 1.6638895501071173e-07,
"logits/generated": -1.7685257196426392,
"logits/real": -1.7595218420028687,
"logps/generated": -670.4361572265625,
"logps/real": -387.3614807128906,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -25.423667907714844,
"rewards/margins": 20.145977020263672,
"rewards/real": -5.277690887451172,
"step": 3270
},
{
"epoch": 2.11,
"learning_rate": 1.6519876219947632e-07,
"logits/generated": -1.7515497207641602,
"logits/real": -1.8205455541610718,
"logps/generated": -609.548828125,
"logps/real": -398.30230712890625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -24.230987548828125,
"rewards/margins": 19.289710998535156,
"rewards/real": -4.9412760734558105,
"step": 3280
},
{
"epoch": 2.11,
"learning_rate": 1.640085693882409e-07,
"logits/generated": -1.7568248510360718,
"logits/real": -1.7875878810882568,
"logps/generated": -670.6278076171875,
"logps/real": -403.04669189453125,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -25.942584991455078,
"rewards/margins": 19.860820770263672,
"rewards/real": -6.081762790679932,
"step": 3290
},
{
"epoch": 2.12,
"learning_rate": 1.6281837657700547e-07,
"logits/generated": -1.672739028930664,
"logits/real": -1.710694670677185,
"logps/generated": -709.2623291015625,
"logps/real": -427.17083740234375,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/generated": -27.278446197509766,
"rewards/margins": 20.85129737854004,
"rewards/real": -6.42714786529541,
"step": 3300
},
{
"epoch": 2.13,
"learning_rate": 1.6162818376577006e-07,
"logits/generated": -1.7365968227386475,
"logits/real": -1.8601267337799072,
"logps/generated": -647.3109130859375,
"logps/real": -408.20904541015625,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -24.648662567138672,
"rewards/margins": 18.514421463012695,
"rewards/real": -6.134242057800293,
"step": 3310
},
{
"epoch": 2.13,
"learning_rate": 1.6043799095453465e-07,
"logits/generated": -1.7646595239639282,
"logits/real": -1.7028201818466187,
"logps/generated": -615.2380981445312,
"logps/real": -392.93438720703125,
"loss": 0.0047,
"rewards/accuracies": 1.0,
"rewards/generated": -23.260175704956055,
"rewards/margins": 17.600215911865234,
"rewards/real": -5.659959316253662,
"step": 3320
},
{
"epoch": 2.14,
"learning_rate": 1.592477981432992e-07,
"logits/generated": -1.677546501159668,
"logits/real": -1.7684322595596313,
"logps/generated": -675.1948852539062,
"logps/real": -372.6117248535156,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -28.304229736328125,
"rewards/margins": 22.81051254272461,
"rewards/real": -5.493716239929199,
"step": 3330
},
{
"epoch": 2.15,
"learning_rate": 1.5805760533206378e-07,
"logits/generated": -1.6419671773910522,
"logits/real": -1.754093885421753,
"logps/generated": -710.6311645507812,
"logps/real": -412.677001953125,
"loss": 0.0029,
"rewards/accuracies": 1.0,
"rewards/generated": -26.5703125,
"rewards/margins": 20.43376922607422,
"rewards/real": -6.136545658111572,
"step": 3340
},
{
"epoch": 2.15,
"learning_rate": 1.5686741252082837e-07,
"logits/generated": -1.6100307703018188,
"logits/real": -1.7829450368881226,
"logps/generated": -626.542236328125,
"logps/real": -390.32904052734375,
"loss": 0.0039,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.756441116333008,
"rewards/margins": 18.904939651489258,
"rewards/real": -3.85149884223938,
"step": 3350
},
{
"epoch": 2.16,
"learning_rate": 1.5567721970959293e-07,
"logits/generated": -1.7627454996109009,
"logits/real": -1.9145416021347046,
"logps/generated": -677.59765625,
"logps/real": -411.33648681640625,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/generated": -24.147655487060547,
"rewards/margins": 19.63693618774414,
"rewards/real": -4.51071834564209,
"step": 3360
},
{
"epoch": 2.17,
"learning_rate": 1.5448702689835752e-07,
"logits/generated": -1.6949329376220703,
"logits/real": -1.7547237873077393,
"logps/generated": -625.1910400390625,
"logps/real": -367.7977600097656,
"loss": 0.0029,
"rewards/accuracies": 1.0,
"rewards/generated": -23.851118087768555,
"rewards/margins": 19.71878433227539,
"rewards/real": -4.132336616516113,
"step": 3370
},
{
"epoch": 2.17,
"learning_rate": 1.532968340871221e-07,
"logits/generated": -1.744741439819336,
"logits/real": -1.664894700050354,
"logps/generated": -640.474853515625,
"logps/real": -399.55108642578125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -23.19083023071289,
"rewards/margins": 18.417896270751953,
"rewards/real": -4.772933006286621,
"step": 3380
},
{
"epoch": 2.18,
"learning_rate": 1.5210664127588667e-07,
"logits/generated": -1.662096381187439,
"logits/real": -1.8298946619033813,
"logps/generated": -631.4981689453125,
"logps/real": -397.19586181640625,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -23.526195526123047,
"rewards/margins": 19.6595516204834,
"rewards/real": -3.866642475128174,
"step": 3390
},
{
"epoch": 2.19,
"learning_rate": 1.5091644846465126e-07,
"logits/generated": -1.7289931774139404,
"logits/real": -1.8209636211395264,
"logps/generated": -650.0264892578125,
"logps/real": -391.0191345214844,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/generated": -22.553695678710938,
"rewards/margins": 18.257787704467773,
"rewards/real": -4.295907974243164,
"step": 3400
},
{
"epoch": 2.19,
"learning_rate": 1.4972625565341585e-07,
"logits/generated": -1.563467025756836,
"logits/real": -1.7469037771224976,
"logps/generated": -654.3523559570312,
"logps/real": -378.0865783691406,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/generated": -25.712825775146484,
"rewards/margins": 20.601755142211914,
"rewards/real": -5.1110734939575195,
"step": 3410
},
{
"epoch": 2.2,
"learning_rate": 1.4853606284218042e-07,
"logits/generated": -1.7174959182739258,
"logits/real": -1.8672186136245728,
"logps/generated": -646.060791015625,
"logps/real": -402.63372802734375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.062244415283203,
"rewards/margins": 20.159557342529297,
"rewards/real": -5.902686595916748,
"step": 3420
},
{
"epoch": 2.2,
"learning_rate": 1.47345870030945e-07,
"logits/generated": -1.7195484638214111,
"logits/real": -1.7659461498260498,
"logps/generated": -670.35693359375,
"logps/real": -430.11724853515625,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -25.210329055786133,
"rewards/margins": 19.280933380126953,
"rewards/real": -5.92939567565918,
"step": 3430
},
{
"epoch": 2.21,
"learning_rate": 1.461556772197096e-07,
"logits/generated": -1.6868633031845093,
"logits/real": -1.7503039836883545,
"logps/generated": -666.7827758789062,
"logps/real": -404.4327087402344,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -24.964031219482422,
"rewards/margins": 18.93131446838379,
"rewards/real": -6.03271484375,
"step": 3440
},
{
"epoch": 2.22,
"learning_rate": 1.4496548440847416e-07,
"logits/generated": -1.5785818099975586,
"logits/real": -1.695892572402954,
"logps/generated": -671.3966674804688,
"logps/real": -425.050537109375,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -28.694400787353516,
"rewards/margins": 22.770051956176758,
"rewards/real": -5.924350261688232,
"step": 3450
},
{
"epoch": 2.22,
"learning_rate": 1.4377529159723875e-07,
"logits/generated": -1.7265466451644897,
"logits/real": -1.7282949686050415,
"logps/generated": -644.9312744140625,
"logps/real": -385.41571044921875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.46233558654785,
"rewards/margins": 20.13895034790039,
"rewards/real": -5.323385238647461,
"step": 3460
},
{
"epoch": 2.23,
"learning_rate": 1.4258509878600334e-07,
"logits/generated": -1.606693983078003,
"logits/real": -1.7219680547714233,
"logps/generated": -651.696533203125,
"logps/real": -405.09185791015625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -25.30702781677246,
"rewards/margins": 19.77613067626953,
"rewards/real": -5.53089714050293,
"step": 3470
},
{
"epoch": 2.24,
"learning_rate": 1.4139490597476793e-07,
"logits/generated": -1.691404104232788,
"logits/real": -1.7261533737182617,
"logps/generated": -705.8663330078125,
"logps/real": -424.4842224121094,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -28.195613861083984,
"rewards/margins": 22.152070999145508,
"rewards/real": -6.043543815612793,
"step": 3480
},
{
"epoch": 2.24,
"learning_rate": 1.402047131635325e-07,
"logits/generated": -1.5611227750778198,
"logits/real": -1.7058401107788086,
"logps/generated": -670.8802490234375,
"logps/real": -413.3358459472656,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -26.489639282226562,
"rewards/margins": 20.785070419311523,
"rewards/real": -5.704569339752197,
"step": 3490
},
{
"epoch": 2.25,
"learning_rate": 1.3901452035229708e-07,
"logits/generated": -1.512269377708435,
"logits/real": -1.68305242061615,
"logps/generated": -571.3214111328125,
"logps/real": -344.5645751953125,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -24.5626277923584,
"rewards/margins": 18.92730140686035,
"rewards/real": -5.6353278160095215,
"step": 3500
},
{
"epoch": 2.26,
"learning_rate": 1.3782432754106167e-07,
"logits/generated": -1.530753493309021,
"logits/real": -1.6607004404067993,
"logps/generated": -622.2992553710938,
"logps/real": -382.42303466796875,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/generated": -25.785675048828125,
"rewards/margins": 19.25429344177246,
"rewards/real": -6.531381130218506,
"step": 3510
},
{
"epoch": 2.26,
"learning_rate": 1.3663413472982623e-07,
"logits/generated": -1.732834815979004,
"logits/real": -1.736498236656189,
"logps/generated": -631.5408935546875,
"logps/real": -347.4884033203125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -26.657485961914062,
"rewards/margins": 20.199687957763672,
"rewards/real": -6.457800388336182,
"step": 3520
},
{
"epoch": 2.27,
"learning_rate": 1.354439419185908e-07,
"logits/generated": -1.5467314720153809,
"logits/real": -1.6046216487884521,
"logps/generated": -634.9423828125,
"logps/real": -400.60052490234375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -27.31796646118164,
"rewards/margins": 20.177886962890625,
"rewards/real": -7.140076637268066,
"step": 3530
},
{
"epoch": 2.28,
"learning_rate": 1.3425374910735539e-07,
"logits/generated": -1.599615216255188,
"logits/real": -1.7260444164276123,
"logps/generated": -686.1055908203125,
"logps/real": -475.2167053222656,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -26.89980697631836,
"rewards/margins": 20.732288360595703,
"rewards/real": -6.167518615722656,
"step": 3540
},
{
"epoch": 2.28,
"learning_rate": 1.3306355629611995e-07,
"logits/generated": -1.519061803817749,
"logits/real": -1.6003974676132202,
"logps/generated": -736.18408203125,
"logps/real": -413.788330078125,
"loss": 0.0045,
"rewards/accuracies": 1.0,
"rewards/generated": -29.17459487915039,
"rewards/margins": 22.599679946899414,
"rewards/real": -6.574913024902344,
"step": 3550
},
{
"epoch": 2.29,
"learning_rate": 1.3187336348488454e-07,
"logits/generated": -1.5416706800460815,
"logits/real": -1.5915443897247314,
"logps/generated": -693.075927734375,
"logps/real": -461.91339111328125,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/generated": -26.290576934814453,
"rewards/margins": 20.055639266967773,
"rewards/real": -6.234936237335205,
"step": 3560
},
{
"epoch": 2.29,
"learning_rate": 1.3068317067364913e-07,
"logits/generated": -1.521410584449768,
"logits/real": -1.6385908126831055,
"logps/generated": -643.1195068359375,
"logps/real": -369.3460998535156,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -26.421768188476562,
"rewards/margins": 20.350460052490234,
"rewards/real": -6.071305274963379,
"step": 3570
},
{
"epoch": 2.3,
"learning_rate": 1.294929778624137e-07,
"logits/generated": -1.5358374118804932,
"logits/real": -1.6597099304199219,
"logps/generated": -689.6113891601562,
"logps/real": -466.4942321777344,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -24.892501831054688,
"rewards/margins": 19.115379333496094,
"rewards/real": -5.777121543884277,
"step": 3580
},
{
"epoch": 2.31,
"learning_rate": 1.2830278505117828e-07,
"logits/generated": -1.406715989112854,
"logits/real": -1.4844402074813843,
"logps/generated": -658.787353515625,
"logps/real": -436.83636474609375,
"loss": 0.0054,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -28.258020401000977,
"rewards/margins": 21.475244522094727,
"rewards/real": -6.782778739929199,
"step": 3590
},
{
"epoch": 2.31,
"learning_rate": 1.2711259223994287e-07,
"logits/generated": -1.3396766185760498,
"logits/real": -1.4809544086456299,
"logps/generated": -681.5261840820312,
"logps/real": -404.4817199707031,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/generated": -27.8902645111084,
"rewards/margins": 21.514251708984375,
"rewards/real": -6.376011848449707,
"step": 3600
},
{
"epoch": 2.32,
"learning_rate": 1.2592239942870743e-07,
"logits/generated": -1.3948055505752563,
"logits/real": -1.5513432025909424,
"logps/generated": -698.7779541015625,
"logps/real": -433.32244873046875,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/generated": -27.659128189086914,
"rewards/margins": 20.854068756103516,
"rewards/real": -6.805060386657715,
"step": 3610
},
{
"epoch": 2.33,
"learning_rate": 1.2473220661747202e-07,
"logits/generated": -1.5047938823699951,
"logits/real": -1.5021181106567383,
"logps/generated": -733.7626342773438,
"logps/real": -459.6304626464844,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -30.57221031188965,
"rewards/margins": 21.513500213623047,
"rewards/real": -9.058713912963867,
"step": 3620
},
{
"epoch": 2.33,
"learning_rate": 1.2354201380623661e-07,
"logits/generated": -1.440640926361084,
"logits/real": -1.4846980571746826,
"logps/generated": -692.8338623046875,
"logps/real": -438.29180908203125,
"loss": 0.0069,
"rewards/accuracies": 1.0,
"rewards/generated": -28.915645599365234,
"rewards/margins": 19.977619171142578,
"rewards/real": -8.93802547454834,
"step": 3630
},
{
"epoch": 2.34,
"learning_rate": 1.2235182099500118e-07,
"logits/generated": -1.4438087940216064,
"logits/real": -1.5675140619277954,
"logps/generated": -683.9814453125,
"logps/real": -481.3240661621094,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -28.231365203857422,
"rewards/margins": 20.71581268310547,
"rewards/real": -7.515552520751953,
"step": 3640
},
{
"epoch": 2.35,
"learning_rate": 1.2116162818376577e-07,
"logits/generated": -1.4532592296600342,
"logits/real": -1.466347336769104,
"logps/generated": -653.7462158203125,
"logps/real": -410.3941955566406,
"loss": 0.0028,
"rewards/accuracies": 1.0,
"rewards/generated": -27.256484985351562,
"rewards/margins": 19.569355010986328,
"rewards/real": -7.687130928039551,
"step": 3650
},
{
"epoch": 2.35,
"learning_rate": 1.1997143537253036e-07,
"logits/generated": -1.519970178604126,
"logits/real": -1.6325457096099854,
"logps/generated": -666.7601318359375,
"logps/real": -440.3809509277344,
"loss": 0.0048,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -28.8579044342041,
"rewards/margins": 21.10441780090332,
"rewards/real": -7.753486633300781,
"step": 3660
},
{
"epoch": 2.36,
"learning_rate": 1.1878124256129493e-07,
"logits/generated": -1.404447317123413,
"logits/real": -1.4850072860717773,
"logps/generated": -646.6796875,
"logps/real": -382.3814697265625,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/generated": -27.619770050048828,
"rewards/margins": 19.931453704833984,
"rewards/real": -7.688315391540527,
"step": 3670
},
{
"epoch": 2.37,
"learning_rate": 1.175910497500595e-07,
"logits/generated": -1.465595006942749,
"logits/real": -1.5086153745651245,
"logps/generated": -713.1724243164062,
"logps/real": -460.9185485839844,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/generated": -30.117782592773438,
"rewards/margins": 21.637718200683594,
"rewards/real": -8.480066299438477,
"step": 3680
},
{
"epoch": 2.37,
"learning_rate": 1.1640085693882409e-07,
"logits/generated": -1.3850997686386108,
"logits/real": -1.4538679122924805,
"logps/generated": -767.3414306640625,
"logps/real": -462.62030029296875,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -33.52153396606445,
"rewards/margins": 23.91811180114746,
"rewards/real": -9.603418350219727,
"step": 3690
},
{
"epoch": 2.38,
"learning_rate": 1.1521066412758866e-07,
"logits/generated": -1.4143835306167603,
"logits/real": -1.5478150844573975,
"logps/generated": -652.1123657226562,
"logps/real": -392.4671630859375,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/generated": -30.432504653930664,
"rewards/margins": 21.238384246826172,
"rewards/real": -9.194117546081543,
"step": 3700
},
{
"epoch": 2.38,
"learning_rate": 1.1402047131635324e-07,
"logits/generated": -1.4064255952835083,
"logits/real": -1.4616397619247437,
"logps/generated": -666.3732299804688,
"logps/real": -342.7879333496094,
"loss": 0.0016,
"rewards/accuracies": 1.0,
"rewards/generated": -29.1993465423584,
"rewards/margins": 22.13344955444336,
"rewards/real": -7.0659003257751465,
"step": 3710
},
{
"epoch": 2.39,
"learning_rate": 1.1283027850511783e-07,
"logits/generated": -1.3749884366989136,
"logits/real": -1.4327641725540161,
"logps/generated": -699.9730224609375,
"logps/real": -428.4892578125,
"loss": 0.0045,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -31.05487060546875,
"rewards/margins": 22.448055267333984,
"rewards/real": -8.606815338134766,
"step": 3720
},
{
"epoch": 2.4,
"learning_rate": 1.116400856938824e-07,
"logits/generated": -1.3594131469726562,
"logits/real": -1.515339732170105,
"logps/generated": -683.3230590820312,
"logps/real": -435.6929626464844,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/generated": -30.349924087524414,
"rewards/margins": 22.398181915283203,
"rewards/real": -7.951746940612793,
"step": 3730
},
{
"epoch": 2.4,
"learning_rate": 1.1044989288264698e-07,
"logits/generated": -1.3991297483444214,
"logits/real": -1.5862958431243896,
"logps/generated": -739.5543212890625,
"logps/real": -410.45501708984375,
"loss": 0.0043,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -30.497798919677734,
"rewards/margins": 21.733530044555664,
"rewards/real": -8.764264106750488,
"step": 3740
},
{
"epoch": 2.41,
"learning_rate": 1.0925970007141157e-07,
"logits/generated": -1.4209661483764648,
"logits/real": -1.554810643196106,
"logps/generated": -690.9508056640625,
"logps/real": -424.572265625,
"loss": 0.0034,
"rewards/accuracies": 1.0,
"rewards/generated": -28.3098201751709,
"rewards/margins": 19.752222061157227,
"rewards/real": -8.557598114013672,
"step": 3750
},
{
"epoch": 2.42,
"learning_rate": 1.0806950726017615e-07,
"logits/generated": -1.4447872638702393,
"logits/real": -1.6798250675201416,
"logps/generated": -628.6717529296875,
"logps/real": -394.5265808105469,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/generated": -28.025936126708984,
"rewards/margins": 20.235803604125977,
"rewards/real": -7.790134429931641,
"step": 3760
},
{
"epoch": 2.42,
"learning_rate": 1.0687931444894072e-07,
"logits/generated": -1.532257318496704,
"logits/real": -1.6047290563583374,
"logps/generated": -702.6226806640625,
"logps/real": -396.7612609863281,
"loss": 0.0028,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -30.113338470458984,
"rewards/margins": 22.376953125,
"rewards/real": -7.73638916015625,
"step": 3770
},
{
"epoch": 2.43,
"learning_rate": 1.056891216377053e-07,
"logits/generated": -1.4834333658218384,
"logits/real": -1.5966551303863525,
"logps/generated": -713.3619995117188,
"logps/real": -440.79095458984375,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -29.308202743530273,
"rewards/margins": 21.465810775756836,
"rewards/real": -7.842390537261963,
"step": 3780
},
{
"epoch": 2.44,
"learning_rate": 1.0449892882646988e-07,
"logits/generated": -1.524183988571167,
"logits/real": -1.608907699584961,
"logps/generated": -710.7420654296875,
"logps/real": -489.75665283203125,
"loss": 0.0028,
"rewards/accuracies": 1.0,
"rewards/generated": -29.5823917388916,
"rewards/margins": 21.41897201538086,
"rewards/real": -8.163420677185059,
"step": 3790
},
{
"epoch": 2.44,
"learning_rate": 1.0330873601523445e-07,
"logits/generated": -1.4193501472473145,
"logits/real": -1.5115816593170166,
"logps/generated": -760.3106689453125,
"logps/real": -442.89898681640625,
"loss": 0.0026,
"rewards/accuracies": 1.0,
"rewards/generated": -30.945148468017578,
"rewards/margins": 22.820213317871094,
"rewards/real": -8.124935150146484,
"step": 3800
},
{
"epoch": 2.45,
"learning_rate": 1.0211854320399904e-07,
"logits/generated": -1.339179277420044,
"logits/real": -1.4539612531661987,
"logps/generated": -684.4679565429688,
"logps/real": -414.834716796875,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -30.45599937438965,
"rewards/margins": 22.370563507080078,
"rewards/real": -8.085436820983887,
"step": 3810
},
{
"epoch": 2.46,
"learning_rate": 1.0092835039276362e-07,
"logits/generated": -1.4860432147979736,
"logits/real": -1.602423071861267,
"logps/generated": -673.7472534179688,
"logps/real": -382.69085693359375,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/generated": -30.125503540039062,
"rewards/margins": 23.682289123535156,
"rewards/real": -6.443214416503906,
"step": 3820
},
{
"epoch": 2.46,
"learning_rate": 9.973815758152821e-08,
"logits/generated": -1.5302735567092896,
"logits/real": -1.5981000661849976,
"logps/generated": -623.1529541015625,
"logps/real": -385.8594970703125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -27.0240478515625,
"rewards/margins": 20.35898208618164,
"rewards/real": -6.665063381195068,
"step": 3830
},
{
"epoch": 2.47,
"learning_rate": 9.854796477029279e-08,
"logits/generated": -1.557305932044983,
"logits/real": -1.6370735168457031,
"logps/generated": -734.8917846679688,
"logps/real": -417.2500915527344,
"loss": 0.0014,
"rewards/accuracies": 1.0,
"rewards/generated": -30.936962127685547,
"rewards/margins": 23.65032386779785,
"rewards/real": -7.286639213562012,
"step": 3840
},
{
"epoch": 2.47,
"learning_rate": 9.735777195905736e-08,
"logits/generated": -1.5963976383209229,
"logits/real": -1.646400809288025,
"logps/generated": -621.7203979492188,
"logps/real": -350.34271240234375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -27.403676986694336,
"rewards/margins": 20.56957244873047,
"rewards/real": -6.834105491638184,
"step": 3850
},
{
"epoch": 2.48,
"learning_rate": 9.616757914782195e-08,
"logits/generated": -1.6388921737670898,
"logits/real": -1.738226294517517,
"logps/generated": -675.3317260742188,
"logps/real": -426.2586975097656,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -27.32644271850586,
"rewards/margins": 20.16423797607422,
"rewards/real": -7.162204742431641,
"step": 3860
},
{
"epoch": 2.49,
"learning_rate": 9.497738633658653e-08,
"logits/generated": -1.472826600074768,
"logits/real": -1.6667178869247437,
"logps/generated": -635.9212036132812,
"logps/real": -377.46685791015625,
"loss": 0.0078,
"rewards/accuracies": 1.0,
"rewards/generated": -27.496978759765625,
"rewards/margins": 20.11826515197754,
"rewards/real": -7.3787126541137695,
"step": 3870
},
{
"epoch": 2.49,
"learning_rate": 9.378719352535109e-08,
"logits/generated": -1.475178837776184,
"logits/real": -1.5875985622406006,
"logps/generated": -704.7142944335938,
"logps/real": -400.84521484375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -29.746139526367188,
"rewards/margins": 20.98758316040039,
"rewards/real": -8.758556365966797,
"step": 3880
},
{
"epoch": 2.5,
"learning_rate": 9.259700071411568e-08,
"logits/generated": -1.5485643148422241,
"logits/real": -1.5439013242721558,
"logps/generated": -741.1964111328125,
"logps/real": -408.63330078125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -30.4178409576416,
"rewards/margins": 23.00247573852539,
"rewards/real": -7.415367126464844,
"step": 3890
},
{
"epoch": 2.51,
"learning_rate": 9.140680790288026e-08,
"logits/generated": -1.4215214252471924,
"logits/real": -1.5626184940338135,
"logps/generated": -667.1637573242188,
"logps/real": -382.75238037109375,
"loss": 0.007,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -29.500041961669922,
"rewards/margins": 21.99424934387207,
"rewards/real": -7.505797386169434,
"step": 3900
},
{
"epoch": 2.51,
"learning_rate": 9.021661509164484e-08,
"logits/generated": -1.4949233531951904,
"logits/real": -1.5658118724822998,
"logps/generated": -680.2150268554688,
"logps/real": -470.9717712402344,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -28.54837989807129,
"rewards/margins": 21.074321746826172,
"rewards/real": -7.474058628082275,
"step": 3910
},
{
"epoch": 2.52,
"learning_rate": 8.902642228040942e-08,
"logits/generated": -1.4464380741119385,
"logits/real": -1.5153313875198364,
"logps/generated": -716.2271728515625,
"logps/real": -413.4552307128906,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -29.981014251708984,
"rewards/margins": 22.682100296020508,
"rewards/real": -7.298914909362793,
"step": 3920
},
{
"epoch": 2.53,
"learning_rate": 8.7836229469174e-08,
"logits/generated": -1.4447425603866577,
"logits/real": -1.56985342502594,
"logps/generated": -757.6671142578125,
"logps/real": -408.1660461425781,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/generated": -32.84424591064453,
"rewards/margins": 24.225845336914062,
"rewards/real": -8.618400573730469,
"step": 3930
},
{
"epoch": 2.53,
"learning_rate": 8.664603665793858e-08,
"logits/generated": -1.6285253763198853,
"logits/real": -1.7806179523468018,
"logps/generated": -676.8280029296875,
"logps/real": -428.38995361328125,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/generated": -27.158214569091797,
"rewards/margins": 20.443370819091797,
"rewards/real": -6.714838981628418,
"step": 3940
},
{
"epoch": 2.54,
"learning_rate": 8.545584384670317e-08,
"logits/generated": -1.581805944442749,
"logits/real": -1.6949933767318726,
"logps/generated": -665.0941162109375,
"logps/real": -385.68133544921875,
"loss": 0.0063,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -28.212072372436523,
"rewards/margins": 21.672542572021484,
"rewards/real": -6.539525508880615,
"step": 3950
},
{
"epoch": 2.54,
"learning_rate": 8.426565103546774e-08,
"logits/generated": -1.4136435985565186,
"logits/real": -1.5828819274902344,
"logps/generated": -684.5712280273438,
"logps/real": -426.00726318359375,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -27.231287002563477,
"rewards/margins": 20.253149032592773,
"rewards/real": -6.978137016296387,
"step": 3960
},
{
"epoch": 2.55,
"learning_rate": 8.307545822423233e-08,
"logits/generated": -1.4507461786270142,
"logits/real": -1.5602095127105713,
"logps/generated": -665.6134033203125,
"logps/real": -367.7745361328125,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -27.6043701171875,
"rewards/margins": 20.994644165039062,
"rewards/real": -6.6097259521484375,
"step": 3970
},
{
"epoch": 2.56,
"learning_rate": 8.18852654129969e-08,
"logits/generated": -1.6215112209320068,
"logits/real": -1.6469926834106445,
"logps/generated": -666.5162963867188,
"logps/real": -402.77227783203125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -27.933940887451172,
"rewards/margins": 20.598087310791016,
"rewards/real": -7.335852146148682,
"step": 3980
},
{
"epoch": 2.56,
"learning_rate": 8.069507260176147e-08,
"logits/generated": -1.5119495391845703,
"logits/real": -1.6007074117660522,
"logps/generated": -637.7647094726562,
"logps/real": -355.0984802246094,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -27.141719818115234,
"rewards/margins": 20.317394256591797,
"rewards/real": -6.8243231773376465,
"step": 3990
},
{
"epoch": 2.57,
"learning_rate": 7.950487979052606e-08,
"logits/generated": -1.5609657764434814,
"logits/real": -1.6627038717269897,
"logps/generated": -708.6666259765625,
"logps/real": -415.657470703125,
"loss": 0.0028,
"rewards/accuracies": 1.0,
"rewards/generated": -30.411911010742188,
"rewards/margins": 22.86978530883789,
"rewards/real": -7.542126655578613,
"step": 4000
},
{
"epoch": 2.58,
"learning_rate": 7.831468697929064e-08,
"logits/generated": -1.4143073558807373,
"logits/real": -1.4475321769714355,
"logps/generated": -650.6522216796875,
"logps/real": -392.54345703125,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/generated": -25.745223999023438,
"rewards/margins": 20.270931243896484,
"rewards/real": -5.474294185638428,
"step": 4010
},
{
"epoch": 2.58,
"learning_rate": 7.712449416805522e-08,
"logits/generated": -1.4251785278320312,
"logits/real": -1.529900074005127,
"logps/generated": -684.9102172851562,
"logps/real": -411.81207275390625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.357402801513672,
"rewards/margins": 19.91077995300293,
"rewards/real": -6.4466233253479,
"step": 4020
},
{
"epoch": 2.59,
"learning_rate": 7.59343013568198e-08,
"logits/generated": -1.4817150831222534,
"logits/real": -1.5895212888717651,
"logps/generated": -647.0018310546875,
"logps/real": -350.5855407714844,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -29.597564697265625,
"rewards/margins": 23.217004776000977,
"rewards/real": -6.38055944442749,
"step": 4030
},
{
"epoch": 2.6,
"learning_rate": 7.474410854558438e-08,
"logits/generated": -1.5963512659072876,
"logits/real": -1.637025237083435,
"logps/generated": -596.042724609375,
"logps/real": -327.88970947265625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.45895767211914,
"rewards/margins": 19.825878143310547,
"rewards/real": -6.633078098297119,
"step": 4040
},
{
"epoch": 2.6,
"learning_rate": 7.355391573434896e-08,
"logits/generated": -1.638891577720642,
"logits/real": -1.7146186828613281,
"logps/generated": -687.0392456054688,
"logps/real": -365.47528076171875,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -27.7447509765625,
"rewards/margins": 20.9066219329834,
"rewards/real": -6.838125705718994,
"step": 4050
},
{
"epoch": 2.61,
"learning_rate": 7.236372292311355e-08,
"logits/generated": -1.4594347476959229,
"logits/real": -1.6632425785064697,
"logps/generated": -696.3617553710938,
"logps/real": -408.8171081542969,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -27.696395874023438,
"rewards/margins": 20.56673812866211,
"rewards/real": -7.129660129547119,
"step": 4060
},
{
"epoch": 2.62,
"learning_rate": 7.117353011187813e-08,
"logits/generated": -1.4063997268676758,
"logits/real": -1.4948168992996216,
"logps/generated": -661.9642333984375,
"logps/real": -434.97442626953125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.784564971923828,
"rewards/margins": 17.620466232299805,
"rewards/real": -8.164094924926758,
"step": 4070
},
{
"epoch": 2.62,
"learning_rate": 6.998333730064269e-08,
"logits/generated": -1.438319444656372,
"logits/real": -1.5320520401000977,
"logps/generated": -679.9823608398438,
"logps/real": -425.42413330078125,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -28.671300888061523,
"rewards/margins": 21.72645378112793,
"rewards/real": -6.944846153259277,
"step": 4080
},
{
"epoch": 2.63,
"learning_rate": 6.879314448940728e-08,
"logits/generated": -1.365595817565918,
"logits/real": -1.505392074584961,
"logps/generated": -624.4268798828125,
"logps/real": -387.62554931640625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -27.39801597595215,
"rewards/margins": 20.606674194335938,
"rewards/real": -6.791341304779053,
"step": 4090
},
{
"epoch": 2.63,
"learning_rate": 6.760295167817185e-08,
"logits/generated": -1.6268279552459717,
"logits/real": -1.6268571615219116,
"logps/generated": -725.8529052734375,
"logps/real": -467.90618896484375,
"loss": 0.0035,
"rewards/accuracies": 1.0,
"rewards/generated": -26.912384033203125,
"rewards/margins": 20.454193115234375,
"rewards/real": -6.45819616317749,
"step": 4100
},
{
"epoch": 2.64,
"learning_rate": 6.641275886693644e-08,
"logits/generated": -1.4818575382232666,
"logits/real": -1.5495567321777344,
"logps/generated": -605.5071411132812,
"logps/real": -372.8101501464844,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/generated": -26.420181274414062,
"rewards/margins": 19.029685974121094,
"rewards/real": -7.390494346618652,
"step": 4110
},
{
"epoch": 2.65,
"learning_rate": 6.522256605570102e-08,
"logits/generated": -1.5707738399505615,
"logits/real": -1.6774705648422241,
"logps/generated": -636.1964111328125,
"logps/real": -391.5029602050781,
"loss": 0.0087,
"rewards/accuracies": 1.0,
"rewards/generated": -27.28677749633789,
"rewards/margins": 21.367889404296875,
"rewards/real": -5.918887138366699,
"step": 4120
},
{
"epoch": 2.65,
"learning_rate": 6.40323732444656e-08,
"logits/generated": -1.545809030532837,
"logits/real": -1.5913245677947998,
"logps/generated": -630.56640625,
"logps/real": -422.04205322265625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -24.0881404876709,
"rewards/margins": 18.995553970336914,
"rewards/real": -5.092586040496826,
"step": 4130
},
{
"epoch": 2.66,
"learning_rate": 6.284218043323019e-08,
"logits/generated": -1.59109365940094,
"logits/real": -1.6737741231918335,
"logps/generated": -642.7061157226562,
"logps/real": -410.7779235839844,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -25.49496841430664,
"rewards/margins": 19.26400375366211,
"rewards/real": -6.230964660644531,
"step": 4140
},
{
"epoch": 2.67,
"learning_rate": 6.165198762199476e-08,
"logits/generated": -1.633514404296875,
"logits/real": -1.619410514831543,
"logps/generated": -639.6582641601562,
"logps/real": -386.4176330566406,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/generated": -24.464160919189453,
"rewards/margins": 18.57771110534668,
"rewards/real": -5.886451721191406,
"step": 4150
},
{
"epoch": 2.67,
"learning_rate": 6.046179481075934e-08,
"logits/generated": -1.4455702304840088,
"logits/real": -1.7037875652313232,
"logps/generated": -664.431640625,
"logps/real": -400.7237548828125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.038738250732422,
"rewards/margins": 19.530742645263672,
"rewards/real": -5.507995128631592,
"step": 4160
},
{
"epoch": 2.68,
"learning_rate": 5.9271601999523916e-08,
"logits/generated": -1.495924949645996,
"logits/real": -1.673275351524353,
"logps/generated": -645.5086669921875,
"logps/real": -383.1039123535156,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -25.089937210083008,
"rewards/margins": 19.57440185546875,
"rewards/real": -5.515534400939941,
"step": 4170
},
{
"epoch": 2.69,
"learning_rate": 5.80814091882885e-08,
"logits/generated": -1.43355393409729,
"logits/real": -1.5794459581375122,
"logps/generated": -708.1492309570312,
"logps/real": -422.21917724609375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -26.978546142578125,
"rewards/margins": 21.33310317993164,
"rewards/real": -5.645444393157959,
"step": 4180
},
{
"epoch": 2.69,
"learning_rate": 5.689121637705308e-08,
"logits/generated": -1.4713923931121826,
"logits/real": -1.6138818264007568,
"logps/generated": -568.523193359375,
"logps/real": -390.0688171386719,
"loss": 0.01,
"rewards/accuracies": 1.0,
"rewards/generated": -23.389972686767578,
"rewards/margins": 17.943958282470703,
"rewards/real": -5.446010589599609,
"step": 4190
},
{
"epoch": 2.7,
"learning_rate": 5.5701023565817666e-08,
"logits/generated": -1.5411484241485596,
"logits/real": -1.5925050973892212,
"logps/generated": -566.3245849609375,
"logps/real": -389.99395751953125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -23.01774787902832,
"rewards/margins": 18.14521598815918,
"rewards/real": -4.872531890869141,
"step": 4200
},
{
"epoch": 2.71,
"learning_rate": 5.4510830754582236e-08,
"logits/generated": -1.5211843252182007,
"logits/real": -1.6060224771499634,
"logps/generated": -618.6573486328125,
"logps/real": -381.83154296875,
"loss": 0.0014,
"rewards/accuracies": 1.0,
"rewards/generated": -25.276790618896484,
"rewards/margins": 20.00619888305664,
"rewards/real": -5.270589351654053,
"step": 4210
},
{
"epoch": 2.71,
"learning_rate": 5.332063794334682e-08,
"logits/generated": -1.4857370853424072,
"logits/real": -1.5991318225860596,
"logps/generated": -628.9978637695312,
"logps/real": -368.8316955566406,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -25.434362411499023,
"rewards/margins": 20.840261459350586,
"rewards/real": -4.59410285949707,
"step": 4220
},
{
"epoch": 2.72,
"learning_rate": 5.21304451321114e-08,
"logits/generated": -1.481233835220337,
"logits/real": -1.616990089416504,
"logps/generated": -612.8431396484375,
"logps/real": -386.7208557128906,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -23.88302993774414,
"rewards/margins": 19.562503814697266,
"rewards/real": -4.32052755355835,
"step": 4230
},
{
"epoch": 2.72,
"learning_rate": 5.0940252320875985e-08,
"logits/generated": -1.543163537979126,
"logits/real": -1.64615797996521,
"logps/generated": -642.1202392578125,
"logps/real": -387.37042236328125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -22.703163146972656,
"rewards/margins": 18.479257583618164,
"rewards/real": -4.223905086517334,
"step": 4240
},
{
"epoch": 2.73,
"learning_rate": 4.975005950964056e-08,
"logits/generated": -1.5782445669174194,
"logits/real": -1.6280380487442017,
"logps/generated": -670.2909545898438,
"logps/real": -409.8173828125,
"loss": 0.0101,
"rewards/accuracies": 1.0,
"rewards/generated": -25.616764068603516,
"rewards/margins": 20.43697738647461,
"rewards/real": -5.1797871589660645,
"step": 4250
},
{
"epoch": 2.74,
"learning_rate": 4.855986669840514e-08,
"logits/generated": -1.5900815725326538,
"logits/real": -1.621788740158081,
"logps/generated": -621.515625,
"logps/real": -400.93658447265625,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/generated": -24.718387603759766,
"rewards/margins": 18.437381744384766,
"rewards/real": -6.281005859375,
"step": 4260
},
{
"epoch": 2.74,
"learning_rate": 4.736967388716972e-08,
"logits/generated": -1.5760804414749146,
"logits/real": -1.6673088073730469,
"logps/generated": -629.8677978515625,
"logps/real": -367.3181457519531,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.90537452697754,
"rewards/margins": 19.712932586669922,
"rewards/real": -6.192440509796143,
"step": 4270
},
{
"epoch": 2.75,
"learning_rate": 4.61794810759343e-08,
"logits/generated": -1.5383670330047607,
"logits/real": -1.6774461269378662,
"logps/generated": -668.8092041015625,
"logps/real": -413.6837463378906,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -24.660079956054688,
"rewards/margins": 20.008989334106445,
"rewards/real": -4.65109395980835,
"step": 4280
},
{
"epoch": 2.76,
"learning_rate": 4.498928826469888e-08,
"logits/generated": -1.516428828239441,
"logits/real": -1.6732898950576782,
"logps/generated": -604.8695068359375,
"logps/real": -424.62481689453125,
"loss": 0.0033,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -23.037944793701172,
"rewards/margins": 18.540569305419922,
"rewards/real": -4.497374534606934,
"step": 4290
},
{
"epoch": 2.76,
"learning_rate": 4.3799095453463464e-08,
"logits/generated": -1.5560497045516968,
"logits/real": -1.6302626132965088,
"logps/generated": -725.353271484375,
"logps/real": -401.71990966796875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.72428321838379,
"rewards/margins": 20.166976928710938,
"rewards/real": -5.557308197021484,
"step": 4300
},
{
"epoch": 2.77,
"learning_rate": 4.2608902642228033e-08,
"logits/generated": -1.5148546695709229,
"logits/real": -1.635724425315857,
"logps/generated": -560.277587890625,
"logps/real": -350.01739501953125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -22.92128562927246,
"rewards/margins": 17.64234733581543,
"rewards/real": -5.2789411544799805,
"step": 4310
},
{
"epoch": 2.78,
"learning_rate": 4.1418709830992617e-08,
"logits/generated": -1.5678379535675049,
"logits/real": -1.6492674350738525,
"logps/generated": -651.1310424804688,
"logps/real": -392.8611755371094,
"loss": 0.0026,
"rewards/accuracies": 1.0,
"rewards/generated": -23.585779190063477,
"rewards/margins": 17.906591415405273,
"rewards/real": -5.679187297821045,
"step": 4320
},
{
"epoch": 2.78,
"learning_rate": 4.02285170197572e-08,
"logits/generated": -1.5977767705917358,
"logits/real": -1.6969823837280273,
"logps/generated": -596.7791748046875,
"logps/real": -366.9782409667969,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/generated": -24.06852149963379,
"rewards/margins": 18.731252670288086,
"rewards/real": -5.3372673988342285,
"step": 4330
},
{
"epoch": 2.79,
"learning_rate": 3.903832420852178e-08,
"logits/generated": -1.5980104207992554,
"logits/real": -1.624751329421997,
"logps/generated": -634.3136596679688,
"logps/real": -421.5874938964844,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/generated": -25.012256622314453,
"rewards/margins": 19.190711975097656,
"rewards/real": -5.82154655456543,
"step": 4340
},
{
"epoch": 2.8,
"learning_rate": 3.784813139728636e-08,
"logits/generated": -1.544721007347107,
"logits/real": -1.617582082748413,
"logps/generated": -652.9552001953125,
"logps/real": -342.71917724609375,
"loss": 0.0023,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -27.350351333618164,
"rewards/margins": 22.945491790771484,
"rewards/real": -4.404857635498047,
"step": 4350
},
{
"epoch": 2.8,
"learning_rate": 3.6657938586050936e-08,
"logits/generated": -1.4935457706451416,
"logits/real": -1.6021337509155273,
"logps/generated": -588.6372680664062,
"logps/real": -402.85235595703125,
"loss": 0.0045,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -23.209640502929688,
"rewards/margins": 19.01520347595215,
"rewards/real": -4.19443416595459,
"step": 4360
},
{
"epoch": 2.81,
"learning_rate": 3.546774577481552e-08,
"logits/generated": -1.4029728174209595,
"logits/real": -1.4847666025161743,
"logps/generated": -645.6001586914062,
"logps/real": -414.8529357910156,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/generated": -25.182071685791016,
"rewards/margins": 19.215055465698242,
"rewards/real": -5.967015266418457,
"step": 4370
},
{
"epoch": 2.81,
"learning_rate": 3.42775529635801e-08,
"logits/generated": -1.5726209878921509,
"logits/real": -1.7626521587371826,
"logps/generated": -664.4998779296875,
"logps/real": -424.19140625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -24.57777976989746,
"rewards/margins": 19.955883026123047,
"rewards/real": -4.621894359588623,
"step": 4380
},
{
"epoch": 2.82,
"learning_rate": 3.308736015234468e-08,
"logits/generated": -1.5751293897628784,
"logits/real": -1.6218827962875366,
"logps/generated": -606.050048828125,
"logps/real": -415.70599365234375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -23.84659767150879,
"rewards/margins": 18.291515350341797,
"rewards/real": -5.555081844329834,
"step": 4390
},
{
"epoch": 2.83,
"learning_rate": 3.189716734110926e-08,
"logits/generated": -1.5750287771224976,
"logits/real": -1.6524326801300049,
"logps/generated": -662.5819702148438,
"logps/real": -409.240966796875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -24.22734832763672,
"rewards/margins": 19.554428100585938,
"rewards/real": -4.6729207038879395,
"step": 4400
},
{
"epoch": 2.83,
"learning_rate": 3.070697452987384e-08,
"logits/generated": -1.434731125831604,
"logits/real": -1.5952690839767456,
"logps/generated": -627.177001953125,
"logps/real": -401.0657653808594,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -24.574573516845703,
"rewards/margins": 19.57662582397461,
"rewards/real": -4.99794864654541,
"step": 4410
},
{
"epoch": 2.84,
"learning_rate": 2.9516781718638418e-08,
"logits/generated": -1.5248663425445557,
"logits/real": -1.643877387046814,
"logps/generated": -664.0775146484375,
"logps/real": -432.7522888183594,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/generated": -24.960874557495117,
"rewards/margins": 19.798603057861328,
"rewards/real": -5.162272930145264,
"step": 4420
},
{
"epoch": 2.85,
"learning_rate": 2.8326588907402998e-08,
"logits/generated": -1.6145492792129517,
"logits/real": -1.6489204168319702,
"logps/generated": -638.4390869140625,
"logps/real": -405.95135498046875,
"loss": 0.0024,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -24.591712951660156,
"rewards/margins": 19.004825592041016,
"rewards/real": -5.586886882781982,
"step": 4430
},
{
"epoch": 2.85,
"learning_rate": 2.7136396096167577e-08,
"logits/generated": -1.5042235851287842,
"logits/real": -1.6028741598129272,
"logps/generated": -621.9290161132812,
"logps/real": -364.36456298828125,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/generated": -26.292139053344727,
"rewards/margins": 20.4296817779541,
"rewards/real": -5.862456321716309,
"step": 4440
},
{
"epoch": 2.86,
"learning_rate": 2.5946203284932157e-08,
"logits/generated": -1.5380438566207886,
"logits/real": -1.6364552974700928,
"logps/generated": -618.8900756835938,
"logps/real": -363.16387939453125,
"loss": 0.0056,
"rewards/accuracies": 1.0,
"rewards/generated": -26.624120712280273,
"rewards/margins": 20.857669830322266,
"rewards/real": -5.766448974609375,
"step": 4450
},
{
"epoch": 2.87,
"learning_rate": 2.475601047369674e-08,
"logits/generated": -1.3604224920272827,
"logits/real": -1.5043797492980957,
"logps/generated": -601.212158203125,
"logps/real": -355.81329345703125,
"loss": 0.0061,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -25.482114791870117,
"rewards/margins": 20.78329086303711,
"rewards/real": -4.698822021484375,
"step": 4460
},
{
"epoch": 2.87,
"learning_rate": 2.3565817662461317e-08,
"logits/generated": -1.4521681070327759,
"logits/real": -1.6199442148208618,
"logps/generated": -686.1697387695312,
"logps/real": -380.40020751953125,
"loss": 0.0022,
"rewards/accuracies": 1.0,
"rewards/generated": -28.074283599853516,
"rewards/margins": 22.777790069580078,
"rewards/real": -5.296494007110596,
"step": 4470
},
{
"epoch": 2.88,
"learning_rate": 2.2375624851225897e-08,
"logits/generated": -1.5777462720870972,
"logits/real": -1.625754714012146,
"logps/generated": -577.4441528320312,
"logps/real": -390.99676513671875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -22.376705169677734,
"rewards/margins": 17.67047119140625,
"rewards/real": -4.706236839294434,
"step": 4480
},
{
"epoch": 2.89,
"learning_rate": 2.1185432039990476e-08,
"logits/generated": -1.5715150833129883,
"logits/real": -1.6618340015411377,
"logps/generated": -650.3167724609375,
"logps/real": -370.7663269042969,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/generated": -26.569076538085938,
"rewards/margins": 20.809871673583984,
"rewards/real": -5.7592034339904785,
"step": 4490
},
{
"epoch": 2.89,
"learning_rate": 1.9995239228755056e-08,
"logits/generated": -1.4017701148986816,
"logits/real": -1.5704150199890137,
"logps/generated": -623.8563232421875,
"logps/real": -330.9757080078125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -27.09404945373535,
"rewards/margins": 21.123863220214844,
"rewards/real": -5.970187187194824,
"step": 4500
},
{
"epoch": 2.9,
"learning_rate": 1.880504641751964e-08,
"logits/generated": -1.569045066833496,
"logits/real": -1.689866065979004,
"logps/generated": -648.3955078125,
"logps/real": -410.296630859375,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -24.370418548583984,
"rewards/margins": 19.45819091796875,
"rewards/real": -4.912228584289551,
"step": 4510
},
{
"epoch": 2.9,
"learning_rate": 1.7614853606284216e-08,
"logits/generated": -1.5325770378112793,
"logits/real": -1.65109384059906,
"logps/generated": -704.2975463867188,
"logps/real": -418.112060546875,
"loss": 0.0028,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -26.44858169555664,
"rewards/margins": 21.82914161682129,
"rewards/real": -4.619443893432617,
"step": 4520
},
{
"epoch": 2.91,
"learning_rate": 1.64246607950488e-08,
"logits/generated": -1.569603681564331,
"logits/real": -1.6199992895126343,
"logps/generated": -633.3504028320312,
"logps/real": -405.2867126464844,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -24.51732635498047,
"rewards/margins": 19.26373863220215,
"rewards/real": -5.253589153289795,
"step": 4530
},
{
"epoch": 2.92,
"learning_rate": 1.523446798381338e-08,
"logits/generated": -1.538140892982483,
"logits/real": -1.5134851932525635,
"logps/generated": -599.3333740234375,
"logps/real": -339.6556091308594,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.23720932006836,
"rewards/margins": 20.19225311279297,
"rewards/real": -6.044954299926758,
"step": 4540
},
{
"epoch": 2.92,
"learning_rate": 1.4044275172577957e-08,
"logits/generated": -1.576467752456665,
"logits/real": -1.7410484552383423,
"logps/generated": -633.7058715820312,
"logps/real": -355.918212890625,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/generated": -26.02840232849121,
"rewards/margins": 21.529172897338867,
"rewards/real": -4.499229907989502,
"step": 4550
},
{
"epoch": 2.93,
"learning_rate": 1.2854082361342537e-08,
"logits/generated": -1.480687141418457,
"logits/real": -1.6588733196258545,
"logps/generated": -614.5526733398438,
"logps/real": -412.12744140625,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -24.828638076782227,
"rewards/margins": 19.214387893676758,
"rewards/real": -5.614253997802734,
"step": 4560
},
{
"epoch": 2.94,
"learning_rate": 1.1663889550107118e-08,
"logits/generated": -1.5496861934661865,
"logits/real": -1.7427030801773071,
"logps/generated": -634.1248168945312,
"logps/real": -392.64141845703125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/generated": -26.263072967529297,
"rewards/margins": 21.099870681762695,
"rewards/real": -5.163203716278076,
"step": 4570
},
{
"epoch": 2.94,
"learning_rate": 1.0473696738871698e-08,
"logits/generated": -1.5476195812225342,
"logits/real": -1.6109368801116943,
"logps/generated": -661.4473876953125,
"logps/real": -491.182373046875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.115245819091797,
"rewards/margins": 19.50821304321289,
"rewards/real": -6.607035160064697,
"step": 4580
},
{
"epoch": 2.95,
"learning_rate": 9.283503927636276e-09,
"logits/generated": -1.514695405960083,
"logits/real": -1.6349430084228516,
"logps/generated": -645.2971801757812,
"logps/real": -423.22979736328125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -25.907445907592773,
"rewards/margins": 20.990873336791992,
"rewards/real": -4.916577339172363,
"step": 4590
},
{
"epoch": 2.96,
"learning_rate": 8.093311116400856e-09,
"logits/generated": -1.4776127338409424,
"logits/real": -1.5790631771087646,
"logps/generated": -644.6243286132812,
"logps/real": -405.47015380859375,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -25.59290885925293,
"rewards/margins": 19.970502853393555,
"rewards/real": -5.6224045753479,
"step": 4600
},
{
"epoch": 2.96,
"learning_rate": 6.903118305165436e-09,
"logits/generated": -1.551004409790039,
"logits/real": -1.6629527807235718,
"logps/generated": -686.5955810546875,
"logps/real": -435.12750244140625,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/generated": -25.463531494140625,
"rewards/margins": 19.223169326782227,
"rewards/real": -6.240363597869873,
"step": 4610
},
{
"epoch": 2.97,
"learning_rate": 5.712925493930016e-09,
"logits/generated": -1.4893418550491333,
"logits/real": -1.675402283668518,
"logps/generated": -659.8289184570312,
"logps/real": -389.3727722167969,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -27.647533416748047,
"rewards/margins": 21.779216766357422,
"rewards/real": -5.868315696716309,
"step": 4620
},
{
"epoch": 2.98,
"learning_rate": 4.522732682694597e-09,
"logits/generated": -1.5567461252212524,
"logits/real": -1.7037324905395508,
"logps/generated": -679.4031982421875,
"logps/real": -418.8092346191406,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -23.965381622314453,
"rewards/margins": 18.415084838867188,
"rewards/real": -5.550297737121582,
"step": 4630
},
{
"epoch": 2.98,
"learning_rate": 3.332539871459176e-09,
"logits/generated": -1.3903148174285889,
"logits/real": -1.492148756980896,
"logps/generated": -663.8366088867188,
"logps/real": -389.20098876953125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -27.28782081604004,
"rewards/margins": 21.74622344970703,
"rewards/real": -5.541598796844482,
"step": 4640
},
{
"epoch": 2.99,
"learning_rate": 2.1423470602237564e-09,
"logits/generated": -1.5899990797042847,
"logits/real": -1.632591962814331,
"logps/generated": -686.7926025390625,
"logps/real": -420.3905334472656,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -26.265411376953125,
"rewards/margins": 21.27739143371582,
"rewards/real": -4.988020420074463,
"step": 4650
},
{
"epoch": 2.99,
"learning_rate": 9.521542489883362e-10,
"logits/generated": -1.5567301511764526,
"logits/real": -1.6518385410308838,
"logps/generated": -666.6358642578125,
"logps/real": -394.54815673828125,
"loss": 0.01,
"rewards/accuracies": 1.0,
"rewards/generated": -26.553237915039062,
"rewards/margins": 22.035541534423828,
"rewards/real": -4.517697811126709,
"step": 4660
},
{
"epoch": 3.0,
"step": 4668,
"total_flos": 0.0,
"train_loss": 0.04980033338522684,
"train_runtime": 39160.4052,
"train_samples_per_second": 3.814,
"train_steps_per_second": 0.119
}
],
"logging_steps": 10,
"max_steps": 4668,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}