davidberenstein1957's picture
Model save
5dea528 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9983539094650205,
"eval_steps": 25,
"global_step": 1214,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 8.196721311475409e-10,
"logits/generated": -2.0642459392547607,
"logits/real": -2.1011667251586914,
"logps/generated": -767.111328125,
"logps/real": -424.18878173828125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 8.196721311475408e-09,
"logits/generated": -2.0099620819091797,
"logits/real": -2.1245546340942383,
"logps/generated": -645.1455688476562,
"logps/real": -425.1603698730469,
"loss": 0.6885,
"rewards/accuracies": 0.4791666567325592,
"rewards/generated": -0.022459693253040314,
"rewards/margins": 0.015254557132720947,
"rewards/real": -0.007205137051641941,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 1.6393442622950816e-08,
"logits/generated": -2.017244338989258,
"logits/real": -2.1224846839904785,
"logps/generated": -626.304443359375,
"logps/real": -415.47955322265625,
"loss": 0.6097,
"rewards/accuracies": 0.78125,
"rewards/generated": -0.31143561005592346,
"rewards/margins": 0.1948009729385376,
"rewards/real": -0.11663466691970825,
"step": 20
},
{
"epoch": 0.04,
"eval_logits/generated": -2.0004239082336426,
"eval_logits/real": -2.0772550106048584,
"eval_logps/generated": -656.7918701171875,
"eval_logps/real": -449.63409423828125,
"eval_loss": 0.4146920442581177,
"eval_rewards/accuracies": 0.925000011920929,
"eval_rewards/generated": -1.4311684370040894,
"eval_rewards/margins": 0.8120061159133911,
"eval_rewards/real": -0.6191622018814087,
"eval_runtime": 1777.8699,
"eval_samples_per_second": 2.429,
"eval_steps_per_second": 0.076,
"step": 25
},
{
"epoch": 0.05,
"learning_rate": 2.459016393442623e-08,
"logits/generated": -2.018745183944702,
"logits/real": -2.120075225830078,
"logps/generated": -681.8663940429688,
"logps/real": -443.0894470214844,
"loss": 0.4439,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -1.3025258779525757,
"rewards/margins": 0.753677487373352,
"rewards/real": -0.5488484501838684,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 3.278688524590163e-08,
"logits/generated": -2.003990411758423,
"logits/real": -2.0808629989624023,
"logps/generated": -633.3893432617188,
"logps/real": -416.35333251953125,
"loss": 0.3191,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.1262094974517822,
"rewards/margins": 1.220593810081482,
"rewards/real": -0.9056156277656555,
"step": 40
},
{
"epoch": 0.08,
"learning_rate": 4.0983606557377046e-08,
"logits/generated": -1.9361705780029297,
"logits/real": -2.0647199153900146,
"logps/generated": -706.2001342773438,
"logps/real": -468.58807373046875,
"loss": 0.2137,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -3.8283188343048096,
"rewards/margins": 2.2455239295959473,
"rewards/real": -1.582794427871704,
"step": 50
},
{
"epoch": 0.08,
"eval_logits/generated": -1.9305709600448608,
"eval_logits/real": -2.0236809253692627,
"eval_logps/generated": -692.5404052734375,
"eval_logps/real": -463.74224853515625,
"eval_loss": 0.1745266169309616,
"eval_rewards/accuracies": 0.9518518447875977,
"eval_rewards/generated": -5.006031036376953,
"eval_rewards/margins": 2.9760546684265137,
"eval_rewards/real": -2.029975652694702,
"eval_runtime": 1800.9154,
"eval_samples_per_second": 2.398,
"eval_steps_per_second": 0.075,
"step": 50
},
{
"epoch": 0.1,
"learning_rate": 4.918032786885246e-08,
"logits/generated": -1.953768014907837,
"logits/real": -2.022987127304077,
"logps/generated": -717.5247192382812,
"logps/real": -424.16162109375,
"loss": 0.1354,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.01259708404541,
"rewards/margins": 3.8680121898651123,
"rewards/real": -2.144585371017456,
"step": 60
},
{
"epoch": 0.12,
"learning_rate": 5.7377049180327866e-08,
"logits/generated": -1.8369897603988647,
"logits/real": -1.9798635244369507,
"logps/generated": -716.9135131835938,
"logps/real": -459.76275634765625,
"loss": 0.1292,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.8557868003845215,
"rewards/margins": 4.214533805847168,
"rewards/real": -2.641252040863037,
"step": 70
},
{
"epoch": 0.12,
"eval_logits/generated": -1.884318470954895,
"eval_logits/real": -1.9886623620986938,
"eval_logps/generated": -717.4470825195312,
"eval_logps/real": -471.66973876953125,
"eval_loss": 0.10119830071926117,
"eval_rewards/accuracies": 0.9685184955596924,
"eval_rewards/generated": -7.496694087982178,
"eval_rewards/margins": 4.673972129821777,
"eval_rewards/real": -2.8227217197418213,
"eval_runtime": 1800.5623,
"eval_samples_per_second": 2.398,
"eval_steps_per_second": 0.075,
"step": 75
},
{
"epoch": 0.13,
"learning_rate": 6.557377049180327e-08,
"logits/generated": -1.8934190273284912,
"logits/real": -2.0053441524505615,
"logps/generated": -735.2626953125,
"logps/real": -467.66961669921875,
"loss": 0.0881,
"rewards/accuracies": 1.0,
"rewards/generated": -7.727712154388428,
"rewards/margins": 4.887805461883545,
"rewards/real": -2.8399062156677246,
"step": 80
},
{
"epoch": 0.15,
"learning_rate": 7.377049180327869e-08,
"logits/generated": -1.8700984716415405,
"logits/real": -1.931532859802246,
"logps/generated": -777.4097900390625,
"logps/real": -457.1133728027344,
"loss": 0.075,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -9.006689071655273,
"rewards/margins": 5.990359306335449,
"rewards/real": -3.0163300037384033,
"step": 90
},
{
"epoch": 0.16,
"learning_rate": 8.196721311475409e-08,
"logits/generated": -1.8540890216827393,
"logits/real": -1.952444076538086,
"logps/generated": -760.2003784179688,
"logps/real": -471.71221923828125,
"loss": 0.0665,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -8.933283805847168,
"rewards/margins": 5.90076208114624,
"rewards/real": -3.0325207710266113,
"step": 100
},
{
"epoch": 0.16,
"eval_logits/generated": -1.8507987260818481,
"eval_logits/real": -1.9628313779830933,
"eval_logps/generated": -735.65673828125,
"eval_logps/real": -476.3786315917969,
"eval_loss": 0.0675550326704979,
"eval_rewards/accuracies": 0.9777777791023254,
"eval_rewards/generated": -9.317663192749023,
"eval_rewards/margins": 6.0240478515625,
"eval_rewards/real": -3.2936155796051025,
"eval_runtime": 1798.5965,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 100
},
{
"epoch": 0.18,
"learning_rate": 9.01639344262295e-08,
"logits/generated": -1.7943336963653564,
"logits/real": -1.9300905466079712,
"logps/generated": -762.0491943359375,
"logps/real": -434.4507751464844,
"loss": 0.0579,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -9.933004379272461,
"rewards/margins": 6.8074140548706055,
"rewards/real": -3.1255910396575928,
"step": 110
},
{
"epoch": 0.2,
"learning_rate": 9.836065573770492e-08,
"logits/generated": -1.8353208303451538,
"logits/real": -1.9718765020370483,
"logps/generated": -750.9710693359375,
"logps/real": -449.0281677246094,
"loss": 0.0429,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -10.487658500671387,
"rewards/margins": 7.143439292907715,
"rewards/real": -3.3442184925079346,
"step": 120
},
{
"epoch": 0.21,
"eval_logits/generated": -1.8123193979263306,
"eval_logits/real": -1.9332078695297241,
"eval_logps/generated": -755.2024536132812,
"eval_logps/real": -480.7701110839844,
"eval_loss": 0.04767724126577377,
"eval_rewards/accuracies": 0.9824073910713196,
"eval_rewards/generated": -11.27223014831543,
"eval_rewards/margins": 7.53946590423584,
"eval_rewards/real": -3.73276424407959,
"eval_runtime": 1803.3715,
"eval_samples_per_second": 2.394,
"eval_steps_per_second": 0.075,
"step": 125
},
{
"epoch": 0.21,
"learning_rate": 9.926739926739926e-08,
"logits/generated": -1.8151371479034424,
"logits/real": -1.9583898782730103,
"logps/generated": -810.3426513671875,
"logps/real": -501.2919921875,
"loss": 0.0431,
"rewards/accuracies": 1.0,
"rewards/generated": -11.520541191101074,
"rewards/margins": 7.8352227210998535,
"rewards/real": -3.6853184700012207,
"step": 130
},
{
"epoch": 0.23,
"learning_rate": 9.835164835164835e-08,
"logits/generated": -1.8159958124160767,
"logits/real": -1.9128528833389282,
"logps/generated": -802.4890747070312,
"logps/real": -464.2137756347656,
"loss": 0.0511,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -12.06971263885498,
"rewards/margins": 8.350500106811523,
"rewards/real": -3.719212055206299,
"step": 140
},
{
"epoch": 0.25,
"learning_rate": 9.743589743589743e-08,
"logits/generated": -1.754547119140625,
"logits/real": -1.9312493801116943,
"logps/generated": -814.783935546875,
"logps/real": -467.99609375,
"loss": 0.0299,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -13.577325820922852,
"rewards/margins": 9.742910385131836,
"rewards/real": -3.83441424369812,
"step": 150
},
{
"epoch": 0.25,
"eval_logits/generated": -1.7938494682312012,
"eval_logits/real": -1.9225581884384155,
"eval_logps/generated": -775.0787353515625,
"eval_logps/real": -485.6038818359375,
"eval_loss": 0.036931850016117096,
"eval_rewards/accuracies": 0.9870370626449585,
"eval_rewards/generated": -13.259866714477539,
"eval_rewards/margins": 9.043731689453125,
"eval_rewards/real": -4.216136932373047,
"eval_runtime": 1778.5818,
"eval_samples_per_second": 2.428,
"eval_steps_per_second": 0.076,
"step": 150
},
{
"epoch": 0.26,
"learning_rate": 9.652014652014652e-08,
"logits/generated": -1.7989473342895508,
"logits/real": -1.9711263179779053,
"logps/generated": -816.1602783203125,
"logps/real": -429.8924865722656,
"loss": 0.0275,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -14.095451354980469,
"rewards/margins": 10.274964332580566,
"rewards/real": -3.820486545562744,
"step": 160
},
{
"epoch": 0.28,
"learning_rate": 9.56043956043956e-08,
"logits/generated": -1.8312492370605469,
"logits/real": -1.9516799449920654,
"logps/generated": -803.2518920898438,
"logps/real": -467.13983154296875,
"loss": 0.0252,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -13.788885116577148,
"rewards/margins": 9.561357498168945,
"rewards/real": -4.227527618408203,
"step": 170
},
{
"epoch": 0.29,
"eval_logits/generated": -1.7758067846298218,
"eval_logits/real": -1.9115736484527588,
"eval_logps/generated": -792.9690551757812,
"eval_logps/real": -490.6431579589844,
"eval_loss": 0.03204120323061943,
"eval_rewards/accuracies": 0.9879629611968994,
"eval_rewards/generated": -15.048893928527832,
"eval_rewards/margins": 10.328824043273926,
"eval_rewards/real": -4.720070838928223,
"eval_runtime": 1779.5528,
"eval_samples_per_second": 2.426,
"eval_steps_per_second": 0.076,
"step": 175
},
{
"epoch": 0.3,
"learning_rate": 9.468864468864468e-08,
"logits/generated": -1.8363538980484009,
"logits/real": -1.9760059118270874,
"logps/generated": -798.1632690429688,
"logps/real": -465.4309997558594,
"loss": 0.018,
"rewards/accuracies": 1.0,
"rewards/generated": -13.945306777954102,
"rewards/margins": 9.537097930908203,
"rewards/real": -4.40820837020874,
"step": 180
},
{
"epoch": 0.31,
"learning_rate": 9.377289377289377e-08,
"logits/generated": -1.7771434783935547,
"logits/real": -1.918859839439392,
"logps/generated": -778.4981689453125,
"logps/real": -445.9978942871094,
"loss": 0.0154,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -15.705945014953613,
"rewards/margins": 11.227587699890137,
"rewards/real": -4.478354454040527,
"step": 190
},
{
"epoch": 0.33,
"learning_rate": 9.285714285714286e-08,
"logits/generated": -1.7843818664550781,
"logits/real": -1.9129893779754639,
"logps/generated": -816.643310546875,
"logps/real": -449.44171142578125,
"loss": 0.0249,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -16.523761749267578,
"rewards/margins": 11.881709098815918,
"rewards/real": -4.642051696777344,
"step": 200
},
{
"epoch": 0.33,
"eval_logits/generated": -1.7515002489089966,
"eval_logits/real": -1.8923099040985107,
"eval_logps/generated": -806.0497436523438,
"eval_logps/real": -494.1994934082031,
"eval_loss": 0.030071575194597244,
"eval_rewards/accuracies": 0.9879629611968994,
"eval_rewards/generated": -16.356964111328125,
"eval_rewards/margins": 11.28126049041748,
"eval_rewards/real": -5.0757036209106445,
"eval_runtime": 1798.4488,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 200
},
{
"epoch": 0.35,
"learning_rate": 9.194139194139193e-08,
"logits/generated": -1.7697616815567017,
"logits/real": -1.9165000915527344,
"logps/generated": -853.5462646484375,
"logps/real": -462.77484130859375,
"loss": 0.0245,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -18.052305221557617,
"rewards/margins": 13.306139945983887,
"rewards/real": -4.746166229248047,
"step": 210
},
{
"epoch": 0.36,
"learning_rate": 9.102564102564102e-08,
"logits/generated": -1.7060960531234741,
"logits/real": -1.8868176937103271,
"logps/generated": -852.2977294921875,
"logps/real": -465.27099609375,
"loss": 0.0175,
"rewards/accuracies": 1.0,
"rewards/generated": -19.251428604125977,
"rewards/margins": 14.104260444641113,
"rewards/real": -5.1471662521362305,
"step": 220
},
{
"epoch": 0.37,
"eval_logits/generated": -1.7361782789230347,
"eval_logits/real": -1.8820877075195312,
"eval_logps/generated": -819.2310180664062,
"eval_logps/real": -497.7419128417969,
"eval_loss": 0.027269212529063225,
"eval_rewards/accuracies": 0.9879629611968994,
"eval_rewards/generated": -17.67508888244629,
"eval_rewards/margins": 12.245142936706543,
"eval_rewards/real": -5.4299445152282715,
"eval_runtime": 1798.3015,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 225
},
{
"epoch": 0.38,
"learning_rate": 9.010989010989011e-08,
"logits/generated": -1.7464491128921509,
"logits/real": -1.911118507385254,
"logps/generated": -843.2506103515625,
"logps/real": -463.8089294433594,
"loss": 0.0137,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -18.588809967041016,
"rewards/margins": 13.585103034973145,
"rewards/real": -5.0037055015563965,
"step": 230
},
{
"epoch": 0.4,
"learning_rate": 8.91941391941392e-08,
"logits/generated": -1.7304248809814453,
"logits/real": -1.868173360824585,
"logps/generated": -819.3607177734375,
"logps/real": -445.7488708496094,
"loss": 0.0219,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -17.680866241455078,
"rewards/margins": 12.903231620788574,
"rewards/real": -4.777635097503662,
"step": 240
},
{
"epoch": 0.41,
"learning_rate": 8.827838827838827e-08,
"logits/generated": -1.7114464044570923,
"logits/real": -1.8701032400131226,
"logps/generated": -823.8060302734375,
"logps/real": -483.9420471191406,
"loss": 0.0183,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -17.834732055664062,
"rewards/margins": 12.7833251953125,
"rewards/real": -5.051407814025879,
"step": 250
},
{
"epoch": 0.41,
"eval_logits/generated": -1.729956865310669,
"eval_logits/real": -1.8793208599090576,
"eval_logps/generated": -826.3790893554688,
"eval_logps/real": -497.6258544921875,
"eval_loss": 0.025423016399145126,
"eval_rewards/accuracies": 0.9888888597488403,
"eval_rewards/generated": -18.389890670776367,
"eval_rewards/margins": 12.971549034118652,
"eval_rewards/real": -5.418341159820557,
"eval_runtime": 1801.3119,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 250
},
{
"epoch": 0.43,
"learning_rate": 8.736263736263736e-08,
"logits/generated": -1.7352432012557983,
"logits/real": -1.9060261249542236,
"logps/generated": -870.6500854492188,
"logps/real": -461.4039611816406,
"loss": 0.0111,
"rewards/accuracies": 1.0,
"rewards/generated": -18.913497924804688,
"rewards/margins": 14.00297737121582,
"rewards/real": -4.910521030426025,
"step": 260
},
{
"epoch": 0.44,
"learning_rate": 8.644688644688645e-08,
"logits/generated": -1.784847617149353,
"logits/real": -1.9353469610214233,
"logps/generated": -850.8590087890625,
"logps/real": -482.2189025878906,
"loss": 0.0182,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -19.183507919311523,
"rewards/margins": 13.948068618774414,
"rewards/real": -5.235440731048584,
"step": 270
},
{
"epoch": 0.45,
"eval_logits/generated": -1.696116328239441,
"eval_logits/real": -1.8563601970672607,
"eval_logps/generated": -848.2400512695312,
"eval_logps/real": -504.34259033203125,
"eval_loss": 0.024484921246767044,
"eval_rewards/accuracies": 0.9888888597488403,
"eval_rewards/generated": -20.575990676879883,
"eval_rewards/margins": 14.485980033874512,
"eval_rewards/real": -6.09001350402832,
"eval_runtime": 1801.2175,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 275
},
{
"epoch": 0.46,
"learning_rate": 8.553113553113552e-08,
"logits/generated": -1.6885887384414673,
"logits/real": -1.873110055923462,
"logps/generated": -874.4225463867188,
"logps/real": -485.9969177246094,
"loss": 0.0225,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -21.401386260986328,
"rewards/margins": 15.265310287475586,
"rewards/real": -6.136077404022217,
"step": 280
},
{
"epoch": 0.48,
"learning_rate": 8.461538461538461e-08,
"logits/generated": -1.7388379573822021,
"logits/real": -1.8577735424041748,
"logps/generated": -902.2374267578125,
"logps/real": -491.22247314453125,
"loss": 0.0217,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -20.343700408935547,
"rewards/margins": 14.47362232208252,
"rewards/real": -5.870078086853027,
"step": 290
},
{
"epoch": 0.49,
"learning_rate": 8.36996336996337e-08,
"logits/generated": -1.760310173034668,
"logits/real": -1.9213718175888062,
"logps/generated": -855.2713623046875,
"logps/real": -482.796875,
"loss": 0.0253,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -20.608051300048828,
"rewards/margins": 15.096084594726562,
"rewards/real": -5.511966228485107,
"step": 300
},
{
"epoch": 0.49,
"eval_logits/generated": -1.693785309791565,
"eval_logits/real": -1.8572747707366943,
"eval_logps/generated": -849.6640014648438,
"eval_logps/real": -502.681884765625,
"eval_loss": 0.02240588143467903,
"eval_rewards/accuracies": 0.989814817905426,
"eval_rewards/generated": -20.7183895111084,
"eval_rewards/margins": 14.79444694519043,
"eval_rewards/real": -5.923939228057861,
"eval_runtime": 1798.8833,
"eval_samples_per_second": 2.4,
"eval_steps_per_second": 0.075,
"step": 300
},
{
"epoch": 0.51,
"learning_rate": 8.278388278388278e-08,
"logits/generated": -1.6374238729476929,
"logits/real": -1.8183997869491577,
"logps/generated": -923.6209106445312,
"logps/real": -475.9380798339844,
"loss": 0.1301,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -25.12307357788086,
"rewards/margins": 18.438941955566406,
"rewards/real": -6.684133052825928,
"step": 310
},
{
"epoch": 0.53,
"learning_rate": 8.186813186813186e-08,
"logits/generated": -1.6634056568145752,
"logits/real": -1.8855922222137451,
"logps/generated": -906.6611328125,
"logps/real": -488.73858642578125,
"loss": 0.0075,
"rewards/accuracies": 1.0,
"rewards/generated": -24.581295013427734,
"rewards/margins": 18.07442283630371,
"rewards/real": -6.50687313079834,
"step": 320
},
{
"epoch": 0.53,
"eval_logits/generated": -1.6521793603897095,
"eval_logits/real": -1.8252357244491577,
"eval_logps/generated": -883.6064453125,
"eval_logps/real": -513.8781127929688,
"eval_loss": 0.023403111845254898,
"eval_rewards/accuracies": 0.989814817905426,
"eval_rewards/generated": -24.112627029418945,
"eval_rewards/margins": 17.069059371948242,
"eval_rewards/real": -7.043565273284912,
"eval_runtime": 1801.6344,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 325
},
{
"epoch": 0.54,
"learning_rate": 8.095238095238095e-08,
"logits/generated": -1.585889458656311,
"logits/real": -1.804424524307251,
"logps/generated": -845.4251708984375,
"logps/real": -472.7271423339844,
"loss": 0.0545,
"rewards/accuracies": 0.96875,
"rewards/generated": -23.844438552856445,
"rewards/margins": 17.44953727722168,
"rewards/real": -6.39490270614624,
"step": 330
},
{
"epoch": 0.56,
"learning_rate": 8.003663003663003e-08,
"logits/generated": -1.6383155584335327,
"logits/real": -1.8644497394561768,
"logps/generated": -858.1883544921875,
"logps/real": -480.23931884765625,
"loss": 0.0129,
"rewards/accuracies": 1.0,
"rewards/generated": -20.641630172729492,
"rewards/margins": 15.211410522460938,
"rewards/real": -5.4302215576171875,
"step": 340
},
{
"epoch": 0.58,
"learning_rate": 7.912087912087911e-08,
"logits/generated": -1.7638896703720093,
"logits/real": -1.9181245565414429,
"logps/generated": -808.9601440429688,
"logps/real": -457.7825622558594,
"loss": 0.0141,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.34470558166504,
"rewards/margins": 14.01134204864502,
"rewards/real": -5.333361625671387,
"step": 350
},
{
"epoch": 0.58,
"eval_logits/generated": -1.7082347869873047,
"eval_logits/real": -1.8693056106567383,
"eval_logps/generated": -852.1936645507812,
"eval_logps/real": -499.138671875,
"eval_loss": 0.021183772012591362,
"eval_rewards/accuracies": 0.989814817905426,
"eval_rewards/generated": -20.971355438232422,
"eval_rewards/margins": 15.401734352111816,
"eval_rewards/real": -5.569622039794922,
"eval_runtime": 1777.6314,
"eval_samples_per_second": 2.429,
"eval_steps_per_second": 0.076,
"step": 350
},
{
"epoch": 0.59,
"learning_rate": 7.82051282051282e-08,
"logits/generated": -1.7445008754730225,
"logits/real": -1.909597396850586,
"logps/generated": -885.7131958007812,
"logps/real": -475.4231872558594,
"loss": 0.0247,
"rewards/accuracies": 1.0,
"rewards/generated": -21.5659236907959,
"rewards/margins": 16.31867218017578,
"rewards/real": -5.247251033782959,
"step": 360
},
{
"epoch": 0.61,
"learning_rate": 7.72893772893773e-08,
"logits/generated": -1.7469732761383057,
"logits/real": -1.8687480688095093,
"logps/generated": -842.2507934570312,
"logps/real": -467.54583740234375,
"loss": 0.0135,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -19.440731048583984,
"rewards/margins": 14.80817985534668,
"rewards/real": -4.632552146911621,
"step": 370
},
{
"epoch": 0.62,
"eval_logits/generated": -1.7284820079803467,
"eval_logits/real": -1.8896727561950684,
"eval_logps/generated": -846.3809204101562,
"eval_logps/real": -496.0889587402344,
"eval_loss": 0.018172312527894974,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -20.39007568359375,
"eval_rewards/margins": 15.125428199768066,
"eval_rewards/real": -5.264645099639893,
"eval_runtime": 1804.3242,
"eval_samples_per_second": 2.393,
"eval_steps_per_second": 0.075,
"step": 375
},
{
"epoch": 0.63,
"learning_rate": 7.637362637362636e-08,
"logits/generated": -1.7371108531951904,
"logits/real": -1.9044015407562256,
"logps/generated": -852.88427734375,
"logps/real": -489.7041015625,
"loss": 0.0123,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -20.25876235961914,
"rewards/margins": 15.011631965637207,
"rewards/real": -5.247129917144775,
"step": 380
},
{
"epoch": 0.64,
"learning_rate": 7.545787545787545e-08,
"logits/generated": -1.7183958292007446,
"logits/real": -1.8460171222686768,
"logps/generated": -853.3533935546875,
"logps/real": -476.1839904785156,
"loss": 0.0069,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -21.06509017944336,
"rewards/margins": 15.901025772094727,
"rewards/real": -5.164063453674316,
"step": 390
},
{
"epoch": 0.66,
"learning_rate": 7.454212454212454e-08,
"logits/generated": -1.7633212804794312,
"logits/real": -1.9220634698867798,
"logps/generated": -842.59765625,
"logps/real": -468.2982482910156,
"loss": 0.014,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -20.100887298583984,
"rewards/margins": 15.023529052734375,
"rewards/real": -5.077359199523926,
"step": 400
},
{
"epoch": 0.66,
"eval_logits/generated": -1.7137374877929688,
"eval_logits/real": -1.8782566785812378,
"eval_logps/generated": -854.0593872070312,
"eval_logps/real": -498.4993591308594,
"eval_loss": 0.01818298175930977,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -21.157926559448242,
"eval_rewards/margins": 15.652240753173828,
"eval_rewards/real": -5.505686283111572,
"eval_runtime": 1801.4399,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 400
},
{
"epoch": 0.67,
"learning_rate": 7.362637362637363e-08,
"logits/generated": -1.7656316757202148,
"logits/real": -1.9041885137557983,
"logps/generated": -824.7591552734375,
"logps/real": -444.58935546875,
"loss": 0.0229,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -20.36834144592285,
"rewards/margins": 15.581771850585938,
"rewards/real": -4.786566734313965,
"step": 410
},
{
"epoch": 0.69,
"learning_rate": 7.27106227106227e-08,
"logits/generated": -1.6781879663467407,
"logits/real": -1.8786585330963135,
"logps/generated": -863.8435668945312,
"logps/real": -460.38494873046875,
"loss": 0.0122,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -22.69711685180664,
"rewards/margins": 17.59657859802246,
"rewards/real": -5.100537300109863,
"step": 420
},
{
"epoch": 0.7,
"eval_logits/generated": -1.7230830192565918,
"eval_logits/real": -1.8856515884399414,
"eval_logps/generated": -849.9996948242188,
"eval_logps/real": -496.84051513671875,
"eval_loss": 0.017169104889035225,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -20.751964569091797,
"eval_rewards/margins": 15.412163734436035,
"eval_rewards/real": -5.33980131149292,
"eval_runtime": 1779.5809,
"eval_samples_per_second": 2.426,
"eval_steps_per_second": 0.076,
"step": 425
},
{
"epoch": 0.71,
"learning_rate": 7.17948717948718e-08,
"logits/generated": -1.7307789325714111,
"logits/real": -1.8954929113388062,
"logps/generated": -858.5565185546875,
"logps/real": -445.24554443359375,
"loss": 0.018,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.846343994140625,
"rewards/margins": 16.011089324951172,
"rewards/real": -4.835254669189453,
"step": 430
},
{
"epoch": 0.72,
"learning_rate": 7.087912087912088e-08,
"logits/generated": -1.7022396326065063,
"logits/real": -1.8817275762557983,
"logps/generated": -864.0067138671875,
"logps/real": -429.09539794921875,
"loss": 0.0169,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -20.980777740478516,
"rewards/margins": 16.663347244262695,
"rewards/real": -4.317431449890137,
"step": 440
},
{
"epoch": 0.74,
"learning_rate": 6.996336996336996e-08,
"logits/generated": -1.8108078241348267,
"logits/real": -1.9502532482147217,
"logps/generated": -838.1130981445312,
"logps/real": -445.90008544921875,
"loss": 0.0144,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -19.511409759521484,
"rewards/margins": 15.326568603515625,
"rewards/real": -4.184841632843018,
"step": 450
},
{
"epoch": 0.74,
"eval_logits/generated": -1.7465310096740723,
"eval_logits/real": -1.9042091369628906,
"eval_logps/generated": -836.2462768554688,
"eval_logps/real": -490.04827880859375,
"eval_loss": 0.016420260071754456,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -19.37661361694336,
"eval_rewards/margins": 14.716034889221191,
"eval_rewards/real": -4.660578727722168,
"eval_runtime": 1791.4683,
"eval_samples_per_second": 2.41,
"eval_steps_per_second": 0.075,
"step": 450
},
{
"epoch": 0.76,
"learning_rate": 6.904761904761905e-08,
"logits/generated": -1.7178394794464111,
"logits/real": -1.9164073467254639,
"logps/generated": -814.5615844726562,
"logps/real": -447.05029296875,
"loss": 0.0206,
"rewards/accuracies": 1.0,
"rewards/generated": -18.99026870727539,
"rewards/margins": 14.712194442749023,
"rewards/real": -4.278077125549316,
"step": 460
},
{
"epoch": 0.77,
"learning_rate": 6.813186813186813e-08,
"logits/generated": -1.7678531408309937,
"logits/real": -1.9162557125091553,
"logps/generated": -866.0538330078125,
"logps/real": -465.385986328125,
"loss": 0.0103,
"rewards/accuracies": 1.0,
"rewards/generated": -20.878631591796875,
"rewards/margins": 16.432056427001953,
"rewards/real": -4.44657564163208,
"step": 470
},
{
"epoch": 0.78,
"eval_logits/generated": -1.744537353515625,
"eval_logits/real": -1.9063953161239624,
"eval_logps/generated": -843.5385131835938,
"eval_logps/real": -492.18194580078125,
"eval_loss": 0.015999892726540565,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -20.1058349609375,
"eval_rewards/margins": 15.231893539428711,
"eval_rewards/real": -4.873941421508789,
"eval_runtime": 1802.1278,
"eval_samples_per_second": 2.396,
"eval_steps_per_second": 0.075,
"step": 475
},
{
"epoch": 0.79,
"learning_rate": 6.721611721611721e-08,
"logits/generated": -1.7018417119979858,
"logits/real": -1.8882300853729248,
"logps/generated": -894.1951293945312,
"logps/real": -452.98052978515625,
"loss": 0.0082,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -22.061939239501953,
"rewards/margins": 17.689193725585938,
"rewards/real": -4.372746467590332,
"step": 480
},
{
"epoch": 0.81,
"learning_rate": 6.63003663003663e-08,
"logits/generated": -1.7523149251937866,
"logits/real": -1.9084774255752563,
"logps/generated": -886.0391845703125,
"logps/real": -463.77880859375,
"loss": 0.0116,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -21.94992446899414,
"rewards/margins": 17.210201263427734,
"rewards/real": -4.739726543426514,
"step": 490
},
{
"epoch": 0.82,
"learning_rate": 6.538461538461538e-08,
"logits/generated": -1.7328628301620483,
"logits/real": -1.9376299381256104,
"logps/generated": -842.7811279296875,
"logps/real": -459.4143981933594,
"loss": 0.0147,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.233800888061523,
"rewards/margins": 16.3835391998291,
"rewards/real": -4.850262641906738,
"step": 500
},
{
"epoch": 0.82,
"eval_logits/generated": -1.7434035539627075,
"eval_logits/real": -1.9092177152633667,
"eval_logps/generated": -852.0874633789062,
"eval_logps/real": -494.6623229980469,
"eval_loss": 0.015602019615471363,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -20.96072769165039,
"eval_rewards/margins": 15.838738441467285,
"eval_rewards/real": -5.121987342834473,
"eval_runtime": 1801.3586,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 500
},
{
"epoch": 0.84,
"learning_rate": 6.446886446886448e-08,
"logits/generated": -1.7443310022354126,
"logits/real": -1.906089186668396,
"logps/generated": -855.3189697265625,
"logps/real": -479.55206298828125,
"loss": 0.017,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -21.070411682128906,
"rewards/margins": 15.907896041870117,
"rewards/real": -5.162516117095947,
"step": 510
},
{
"epoch": 0.86,
"learning_rate": 6.355311355311355e-08,
"logits/generated": -1.7915077209472656,
"logits/real": -1.96005117893219,
"logps/generated": -909.4000854492188,
"logps/real": -510.1407165527344,
"loss": 0.0154,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -22.668399810791016,
"rewards/margins": 17.420974731445312,
"rewards/real": -5.247425079345703,
"step": 520
},
{
"epoch": 0.86,
"eval_logits/generated": -1.7357203960418701,
"eval_logits/real": -1.903997778892517,
"eval_logps/generated": -856.4739990234375,
"eval_logps/real": -494.9234924316406,
"eval_loss": 0.015464075841009617,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -21.399391174316406,
"eval_rewards/margins": 16.25129508972168,
"eval_rewards/real": -5.148096561431885,
"eval_runtime": 1799.2565,
"eval_samples_per_second": 2.4,
"eval_steps_per_second": 0.075,
"step": 525
},
{
"epoch": 0.87,
"learning_rate": 6.263736263736263e-08,
"logits/generated": -1.7220814228057861,
"logits/real": -1.896211862564087,
"logps/generated": -864.8518676757812,
"logps/real": -481.7032775878906,
"loss": 0.0081,
"rewards/accuracies": 1.0,
"rewards/generated": -21.411062240600586,
"rewards/margins": 16.603487014770508,
"rewards/real": -4.807575702667236,
"step": 530
},
{
"epoch": 0.89,
"learning_rate": 6.172161172161173e-08,
"logits/generated": -1.7355706691741943,
"logits/real": -1.9386104345321655,
"logps/generated": -856.8909301757812,
"logps/real": -455.5501403808594,
"loss": 0.0102,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -21.787107467651367,
"rewards/margins": 16.66501808166504,
"rewards/real": -5.122087001800537,
"step": 540
},
{
"epoch": 0.91,
"learning_rate": 6.08058608058608e-08,
"logits/generated": -1.6525169610977173,
"logits/real": -1.872809648513794,
"logps/generated": -870.7745361328125,
"logps/real": -459.68572998046875,
"loss": 0.0158,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -21.89577865600586,
"rewards/margins": 16.658113479614258,
"rewards/real": -5.237664222717285,
"step": 550
},
{
"epoch": 0.91,
"eval_logits/generated": -1.713934302330017,
"eval_logits/real": -1.8881142139434814,
"eval_logps/generated": -872.0122680664062,
"eval_logps/real": -499.5303955078125,
"eval_loss": 0.015055526979267597,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -22.95322036743164,
"eval_rewards/margins": 17.34442710876465,
"eval_rewards/real": -5.608795166015625,
"eval_runtime": 1796.1592,
"eval_samples_per_second": 2.404,
"eval_steps_per_second": 0.075,
"step": 550
},
{
"epoch": 0.92,
"learning_rate": 5.989010989010988e-08,
"logits/generated": -1.7358171939849854,
"logits/real": -1.907268762588501,
"logps/generated": -841.7224731445312,
"logps/real": -471.75689697265625,
"loss": 0.0081,
"rewards/accuracies": 1.0,
"rewards/generated": -21.611263275146484,
"rewards/margins": 16.323144912719727,
"rewards/real": -5.288116455078125,
"step": 560
},
{
"epoch": 0.94,
"learning_rate": 5.897435897435897e-08,
"logits/generated": -1.7383735179901123,
"logits/real": -1.929496169090271,
"logps/generated": -876.6693115234375,
"logps/real": -483.697998046875,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -23.30849838256836,
"rewards/margins": 17.91643714904785,
"rewards/real": -5.392062664031982,
"step": 570
},
{
"epoch": 0.95,
"eval_logits/generated": -1.7112655639648438,
"eval_logits/real": -1.8888392448425293,
"eval_logps/generated": -877.6972045898438,
"eval_logps/real": -500.6514587402344,
"eval_loss": 0.01491004228591919,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -23.521709442138672,
"eval_rewards/margins": 17.800806045532227,
"eval_rewards/real": -5.7208991050720215,
"eval_runtime": 1799.8979,
"eval_samples_per_second": 2.399,
"eval_steps_per_second": 0.075,
"step": 575
},
{
"epoch": 0.95,
"learning_rate": 5.805860805860806e-08,
"logits/generated": -1.7410743236541748,
"logits/real": -1.901346206665039,
"logps/generated": -881.9417724609375,
"logps/real": -473.64892578125,
"loss": 0.0153,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -23.52071762084961,
"rewards/margins": 17.951566696166992,
"rewards/real": -5.569148540496826,
"step": 580
},
{
"epoch": 0.97,
"learning_rate": 5.714285714285714e-08,
"logits/generated": -1.7443621158599854,
"logits/real": -1.8728523254394531,
"logps/generated": -892.2806396484375,
"logps/real": -453.60552978515625,
"loss": 0.015,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -23.209545135498047,
"rewards/margins": 18.014381408691406,
"rewards/real": -5.195165157318115,
"step": 590
},
{
"epoch": 0.99,
"learning_rate": 5.622710622710623e-08,
"logits/generated": -1.7170673608779907,
"logits/real": -1.889995813369751,
"logps/generated": -875.4261474609375,
"logps/real": -450.47576904296875,
"loss": 0.008,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -23.910694122314453,
"rewards/margins": 18.570858001708984,
"rewards/real": -5.339831352233887,
"step": 600
},
{
"epoch": 0.99,
"eval_logits/generated": -1.708635926246643,
"eval_logits/real": -1.8878159523010254,
"eval_logps/generated": -879.9544067382812,
"eval_logps/real": -500.9651184082031,
"eval_loss": 0.01472516916692257,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -23.74742317199707,
"eval_rewards/margins": 17.995161056518555,
"eval_rewards/real": -5.752264022827148,
"eval_runtime": 1800.102,
"eval_samples_per_second": 2.399,
"eval_steps_per_second": 0.075,
"step": 600
},
{
"epoch": 1.0,
"learning_rate": 5.531135531135531e-08,
"logits/generated": -1.6815847158432007,
"logits/real": -1.917245626449585,
"logps/generated": -922.5784912109375,
"logps/real": -475.2225646972656,
"loss": 0.0094,
"rewards/accuracies": 1.0,
"rewards/generated": -25.03234100341797,
"rewards/margins": 19.452983856201172,
"rewards/real": -5.579358100891113,
"step": 610
},
{
"epoch": 1.02,
"learning_rate": 5.439560439560439e-08,
"logits/generated": -1.7002710103988647,
"logits/real": -1.886687994003296,
"logps/generated": -922.806640625,
"logps/real": -481.56787109375,
"loss": 0.0049,
"rewards/accuracies": 1.0,
"rewards/generated": -25.185983657836914,
"rewards/margins": 19.83902359008789,
"rewards/real": -5.346956729888916,
"step": 620
},
{
"epoch": 1.03,
"eval_logits/generated": -1.6730928421020508,
"eval_logits/real": -1.8584686517715454,
"eval_logps/generated": -891.3632202148438,
"eval_logps/real": -505.2818298339844,
"eval_loss": 0.015368033200502396,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -24.888301849365234,
"eval_rewards/margins": 18.704362869262695,
"eval_rewards/real": -6.183938026428223,
"eval_runtime": 1782.9432,
"eval_samples_per_second": 2.422,
"eval_steps_per_second": 0.076,
"step": 625
},
{
"epoch": 1.04,
"learning_rate": 5.347985347985348e-08,
"logits/generated": -1.6648222208023071,
"logits/real": -1.8549429178237915,
"logps/generated": -912.20654296875,
"logps/real": -520.7745361328125,
"loss": 0.004,
"rewards/accuracies": 1.0,
"rewards/generated": -24.70351791381836,
"rewards/margins": 18.5965518951416,
"rewards/real": -6.106965065002441,
"step": 630
},
{
"epoch": 1.05,
"learning_rate": 5.256410256410256e-08,
"logits/generated": -1.704904556274414,
"logits/real": -1.8857839107513428,
"logps/generated": -927.8040771484375,
"logps/real": -481.84820556640625,
"loss": 0.0069,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -26.38739585876465,
"rewards/margins": 20.759992599487305,
"rewards/real": -5.627402305603027,
"step": 640
},
{
"epoch": 1.07,
"learning_rate": 5.164835164835165e-08,
"logits/generated": -1.7228724956512451,
"logits/real": -1.8705856800079346,
"logps/generated": -894.1580810546875,
"logps/real": -462.95697021484375,
"loss": 0.0057,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -24.506175994873047,
"rewards/margins": 18.540332794189453,
"rewards/real": -5.96584415435791,
"step": 650
},
{
"epoch": 1.07,
"eval_logits/generated": -1.6592012643814087,
"eval_logits/real": -1.848427176475525,
"eval_logps/generated": -901.4036865234375,
"eval_logps/real": -508.3891906738281,
"eval_loss": 0.015495581552386284,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -25.892351150512695,
"eval_rewards/margins": 19.397686004638672,
"eval_rewards/real": -6.494665145874023,
"eval_runtime": 1785.6862,
"eval_samples_per_second": 2.418,
"eval_steps_per_second": 0.076,
"step": 650
},
{
"epoch": 1.09,
"learning_rate": 5.073260073260073e-08,
"logits/generated": -1.6706949472427368,
"logits/real": -1.8376855850219727,
"logps/generated": -927.1456298828125,
"logps/real": -447.51123046875,
"loss": 0.0031,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -27.89125633239746,
"rewards/margins": 21.775976181030273,
"rewards/real": -6.1152777671813965,
"step": 660
},
{
"epoch": 1.1,
"learning_rate": 4.981684981684982e-08,
"logits/generated": -1.6462090015411377,
"logits/real": -1.8371574878692627,
"logps/generated": -882.9945068359375,
"logps/real": -455.0235290527344,
"loss": 0.0076,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -25.57509994506836,
"rewards/margins": 19.432373046875,
"rewards/real": -6.142725944519043,
"step": 670
},
{
"epoch": 1.11,
"eval_logits/generated": -1.640711784362793,
"eval_logits/real": -1.8339245319366455,
"eval_logps/generated": -911.697021484375,
"eval_logps/real": -511.9859313964844,
"eval_loss": 0.01578509621322155,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -26.921693801879883,
"eval_rewards/margins": 20.067354202270508,
"eval_rewards/real": -6.854339599609375,
"eval_runtime": 1782.1365,
"eval_samples_per_second": 2.423,
"eval_steps_per_second": 0.076,
"step": 675
},
{
"epoch": 1.12,
"learning_rate": 4.8901098901098895e-08,
"logits/generated": -1.6573280096054077,
"logits/real": -1.885148286819458,
"logps/generated": -923.4420776367188,
"logps/real": -483.80621337890625,
"loss": 0.0057,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -28.48358154296875,
"rewards/margins": 21.583927154541016,
"rewards/real": -6.899654388427734,
"step": 680
},
{
"epoch": 1.14,
"learning_rate": 4.7985347985347985e-08,
"logits/generated": -1.6559168100357056,
"logits/real": -1.8097467422485352,
"logps/generated": -922.6828002929688,
"logps/real": -488.87664794921875,
"loss": 0.005,
"rewards/accuracies": 1.0,
"rewards/generated": -26.857372283935547,
"rewards/margins": 20.4537353515625,
"rewards/real": -6.403636932373047,
"step": 690
},
{
"epoch": 1.15,
"learning_rate": 4.706959706959707e-08,
"logits/generated": -1.6271326541900635,
"logits/real": -1.840662956237793,
"logps/generated": -901.0564575195312,
"logps/real": -491.31146240234375,
"loss": 0.004,
"rewards/accuracies": 1.0,
"rewards/generated": -27.303613662719727,
"rewards/margins": 20.737079620361328,
"rewards/real": -6.566534996032715,
"step": 700
},
{
"epoch": 1.15,
"eval_logits/generated": -1.6268597841262817,
"eval_logits/real": -1.8235687017440796,
"eval_logps/generated": -920.2236328125,
"eval_logps/real": -514.767822265625,
"eval_loss": 0.015848280861973763,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -27.77434730529785,
"eval_rewards/margins": 20.641807556152344,
"eval_rewards/real": -7.132537841796875,
"eval_runtime": 1781.0614,
"eval_samples_per_second": 2.424,
"eval_steps_per_second": 0.076,
"step": 700
},
{
"epoch": 1.17,
"learning_rate": 4.615384615384615e-08,
"logits/generated": -1.6111915111541748,
"logits/real": -1.8123859167099,
"logps/generated": -929.34326171875,
"logps/real": -504.7548828125,
"loss": 0.0041,
"rewards/accuracies": 1.0,
"rewards/generated": -27.69466781616211,
"rewards/margins": 20.481304168701172,
"rewards/real": -7.213364601135254,
"step": 710
},
{
"epoch": 1.19,
"learning_rate": 4.5238095238095236e-08,
"logits/generated": -1.6448142528533936,
"logits/real": -1.8344615697860718,
"logps/generated": -938.2579345703125,
"logps/real": -477.796630859375,
"loss": 0.0168,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -28.318180084228516,
"rewards/margins": 21.399818420410156,
"rewards/real": -6.918364524841309,
"step": 720
},
{
"epoch": 1.19,
"eval_logits/generated": -1.656567931175232,
"eval_logits/real": -1.8447872400283813,
"eval_logps/generated": -905.2711181640625,
"eval_logps/real": -512.4611206054688,
"eval_loss": 0.015721740201115608,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -26.27910041809082,
"eval_rewards/margins": 19.37723731994629,
"eval_rewards/real": -6.901863098144531,
"eval_runtime": 1781.2515,
"eval_samples_per_second": 2.424,
"eval_steps_per_second": 0.076,
"step": 725
},
{
"epoch": 1.2,
"learning_rate": 4.432234432234432e-08,
"logits/generated": -1.6806806325912476,
"logits/real": -1.909649133682251,
"logps/generated": -898.3150634765625,
"logps/real": -469.5130920410156,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -25.730077743530273,
"rewards/margins": 19.823253631591797,
"rewards/real": -5.906826019287109,
"step": 730
},
{
"epoch": 1.22,
"learning_rate": 4.34065934065934e-08,
"logits/generated": -1.734819769859314,
"logits/real": -1.9214661121368408,
"logps/generated": -965.8170776367188,
"logps/real": -494.10760498046875,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -27.31577491760254,
"rewards/margins": 20.62114906311035,
"rewards/real": -6.6946234703063965,
"step": 740
},
{
"epoch": 1.23,
"learning_rate": 4.2490842490842486e-08,
"logits/generated": -1.7267796993255615,
"logits/real": -1.929030179977417,
"logps/generated": -886.3689575195312,
"logps/real": -481.1717834472656,
"loss": 0.0022,
"rewards/accuracies": 1.0,
"rewards/generated": -24.81966781616211,
"rewards/margins": 18.82332992553711,
"rewards/real": -5.996334552764893,
"step": 750
},
{
"epoch": 1.23,
"eval_logits/generated": -1.6533170938491821,
"eval_logits/real": -1.8422995805740356,
"eval_logps/generated": -907.6251220703125,
"eval_logps/real": -513.0281372070312,
"eval_loss": 0.016253722831606865,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -26.514497756958008,
"eval_rewards/margins": 19.55593490600586,
"eval_rewards/real": -6.958561897277832,
"eval_runtime": 1784.393,
"eval_samples_per_second": 2.42,
"eval_steps_per_second": 0.076,
"step": 750
},
{
"epoch": 1.25,
"learning_rate": 4.1575091575091576e-08,
"logits/generated": -1.6823298931121826,
"logits/real": -1.8801406621932983,
"logps/generated": -974.8401489257812,
"logps/real": -505.0538635253906,
"loss": 0.0032,
"rewards/accuracies": 1.0,
"rewards/generated": -28.211589813232422,
"rewards/margins": 21.735652923583984,
"rewards/real": -6.4759368896484375,
"step": 760
},
{
"epoch": 1.27,
"learning_rate": 4.065934065934066e-08,
"logits/generated": -1.6613868474960327,
"logits/real": -1.8679672479629517,
"logps/generated": -923.1788330078125,
"logps/real": -530.2198486328125,
"loss": 0.0039,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -26.568191528320312,
"rewards/margins": 19.437541961669922,
"rewards/real": -7.130646705627441,
"step": 770
},
{
"epoch": 1.28,
"eval_logits/generated": -1.6368576288223267,
"eval_logits/real": -1.8327449560165405,
"eval_logps/generated": -924.7037963867188,
"eval_logps/real": -518.8289184570312,
"eval_loss": 0.0164579376578331,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -28.222370147705078,
"eval_rewards/margins": 20.6837215423584,
"eval_rewards/real": -7.538645267486572,
"eval_runtime": 1783.8825,
"eval_samples_per_second": 2.421,
"eval_steps_per_second": 0.076,
"step": 775
},
{
"epoch": 1.28,
"learning_rate": 3.9743589743589737e-08,
"logits/generated": -1.635840654373169,
"logits/real": -1.838230848312378,
"logps/generated": -982.92529296875,
"logps/real": -524.0345458984375,
"loss": 0.0073,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -28.60614585876465,
"rewards/margins": 21.019775390625,
"rewards/real": -7.586370944976807,
"step": 780
},
{
"epoch": 1.3,
"learning_rate": 3.8827838827838827e-08,
"logits/generated": -1.558452844619751,
"logits/real": -1.8335201740264893,
"logps/generated": -916.1337890625,
"logps/real": -474.8262634277344,
"loss": 0.0073,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -27.614761352539062,
"rewards/margins": 20.65240478515625,
"rewards/real": -6.9623517990112305,
"step": 790
},
{
"epoch": 1.32,
"learning_rate": 3.791208791208791e-08,
"logits/generated": -1.59113347530365,
"logits/real": -1.853981614112854,
"logps/generated": -958.3170166015625,
"logps/real": -483.369140625,
"loss": 0.002,
"rewards/accuracies": 1.0,
"rewards/generated": -30.300689697265625,
"rewards/margins": 23.347179412841797,
"rewards/real": -6.9535112380981445,
"step": 800
},
{
"epoch": 1.32,
"eval_logits/generated": -1.6365333795547485,
"eval_logits/real": -1.8344322443008423,
"eval_logps/generated": -928.9208374023438,
"eval_logps/real": -520.0109252929688,
"eval_loss": 0.016453638672828674,
"eval_rewards/accuracies": 0.9907407164573669,
"eval_rewards/generated": -28.644060134887695,
"eval_rewards/margins": 20.987220764160156,
"eval_rewards/real": -7.6568403244018555,
"eval_runtime": 1804.1661,
"eval_samples_per_second": 2.393,
"eval_steps_per_second": 0.075,
"step": 800
},
{
"epoch": 1.33,
"learning_rate": 3.6996336996336994e-08,
"logits/generated": -1.6491447687149048,
"logits/real": -1.8126541376113892,
"logps/generated": -940.8536376953125,
"logps/real": -472.17559814453125,
"loss": 0.0032,
"rewards/accuracies": 1.0,
"rewards/generated": -29.954341888427734,
"rewards/margins": 23.000102996826172,
"rewards/real": -6.954239845275879,
"step": 810
},
{
"epoch": 1.35,
"learning_rate": 3.608058608058608e-08,
"logits/generated": -1.6780191659927368,
"logits/real": -1.888399362564087,
"logps/generated": -910.19189453125,
"logps/real": -508.4088439941406,
"loss": 0.002,
"rewards/accuracies": 1.0,
"rewards/generated": -28.487747192382812,
"rewards/margins": 20.76497459411621,
"rewards/real": -7.722770690917969,
"step": 820
},
{
"epoch": 1.36,
"eval_logits/generated": -1.6348390579223633,
"eval_logits/real": -1.835233449935913,
"eval_logps/generated": -934.5078125,
"eval_logps/real": -521.4318237304688,
"eval_loss": 0.016549235209822655,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -29.202777862548828,
"eval_rewards/margins": 21.403844833374023,
"eval_rewards/real": -7.7989301681518555,
"eval_runtime": 1798.7077,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 825
},
{
"epoch": 1.37,
"learning_rate": 3.516483516483517e-08,
"logits/generated": -1.6131916046142578,
"logits/real": -1.8359510898590088,
"logps/generated": -923.1203002929688,
"logps/real": -487.068359375,
"loss": 0.0039,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -29.090587615966797,
"rewards/margins": 21.882305145263672,
"rewards/real": -7.208279609680176,
"step": 830
},
{
"epoch": 1.38,
"learning_rate": 3.424908424908425e-08,
"logits/generated": -1.6657575368881226,
"logits/real": -1.8384662866592407,
"logps/generated": -914.0133056640625,
"logps/real": -461.9369201660156,
"loss": 0.0057,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -27.760547637939453,
"rewards/margins": 21.031766891479492,
"rewards/real": -6.7287774085998535,
"step": 840
},
{
"epoch": 1.4,
"learning_rate": 3.333333333333333e-08,
"logits/generated": -1.6930701732635498,
"logits/real": -1.907292366027832,
"logps/generated": -965.4385986328125,
"logps/real": -523.5211181640625,
"loss": 0.0019,
"rewards/accuracies": 1.0,
"rewards/generated": -30.622669219970703,
"rewards/margins": 22.505613327026367,
"rewards/real": -8.117053031921387,
"step": 850
},
{
"epoch": 1.4,
"eval_logits/generated": -1.6166415214538574,
"eval_logits/real": -1.8168882131576538,
"eval_logps/generated": -938.438232421875,
"eval_logps/real": -522.4202880859375,
"eval_loss": 0.016505062580108643,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -29.59580421447754,
"eval_rewards/margins": 21.69802474975586,
"eval_rewards/real": -7.897781848907471,
"eval_runtime": 1800.0221,
"eval_samples_per_second": 2.399,
"eval_steps_per_second": 0.075,
"step": 850
},
{
"epoch": 1.42,
"learning_rate": 3.241758241758242e-08,
"logits/generated": -1.605548620223999,
"logits/real": -1.788865089416504,
"logps/generated": -1042.3509521484375,
"logps/real": -527.3464965820312,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/generated": -32.53047561645508,
"rewards/margins": 24.470928192138672,
"rewards/real": -8.059545516967773,
"step": 860
},
{
"epoch": 1.43,
"learning_rate": 3.15018315018315e-08,
"logits/generated": -1.6185451745986938,
"logits/real": -1.8139030933380127,
"logps/generated": -965.8375854492188,
"logps/real": -508.0126953125,
"loss": 0.0041,
"rewards/accuracies": 1.0,
"rewards/generated": -30.107463836669922,
"rewards/margins": 22.463966369628906,
"rewards/real": -7.643497467041016,
"step": 870
},
{
"epoch": 1.44,
"eval_logits/generated": -1.61648428440094,
"eval_logits/real": -1.81755793094635,
"eval_logps/generated": -940.4099731445312,
"eval_logps/real": -523.1380004882812,
"eval_loss": 0.016207309439778328,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -29.792985916137695,
"eval_rewards/margins": 21.823434829711914,
"eval_rewards/real": -7.969552993774414,
"eval_runtime": 1801.8606,
"eval_samples_per_second": 2.396,
"eval_steps_per_second": 0.075,
"step": 875
},
{
"epoch": 1.45,
"learning_rate": 3.0586080586080584e-08,
"logits/generated": -1.56507408618927,
"logits/real": -1.8616406917572021,
"logps/generated": -976.12548828125,
"logps/real": -496.3408203125,
"loss": 0.0063,
"rewards/accuracies": 1.0,
"rewards/generated": -31.310409545898438,
"rewards/margins": 23.59577178955078,
"rewards/real": -7.714636325836182,
"step": 880
},
{
"epoch": 1.47,
"learning_rate": 2.9670329670329668e-08,
"logits/generated": -1.5671743154525757,
"logits/real": -1.733432412147522,
"logps/generated": -920.916015625,
"logps/real": -522.0253295898438,
"loss": 0.0071,
"rewards/accuracies": 1.0,
"rewards/generated": -28.688098907470703,
"rewards/margins": 20.804473876953125,
"rewards/real": -7.883625030517578,
"step": 890
},
{
"epoch": 1.48,
"learning_rate": 2.875457875457875e-08,
"logits/generated": -1.6324392557144165,
"logits/real": -1.8402057886123657,
"logps/generated": -949.7025146484375,
"logps/real": -491.1188049316406,
"loss": 0.0023,
"rewards/accuracies": 1.0,
"rewards/generated": -31.04391098022461,
"rewards/margins": 23.250308990478516,
"rewards/real": -7.793595790863037,
"step": 900
},
{
"epoch": 1.48,
"eval_logits/generated": -1.6044836044311523,
"eval_logits/real": -1.809339165687561,
"eval_logps/generated": -949.38916015625,
"eval_logps/real": -525.528564453125,
"eval_loss": 0.01638590730726719,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -30.69091033935547,
"eval_rewards/margins": 22.482301712036133,
"eval_rewards/real": -8.208609580993652,
"eval_runtime": 1798.0882,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 900
},
{
"epoch": 1.5,
"learning_rate": 2.7838827838827838e-08,
"logits/generated": -1.596328854560852,
"logits/real": -1.8236808776855469,
"logps/generated": -962.3810424804688,
"logps/real": -511.0006408691406,
"loss": 0.0061,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -31.28286361694336,
"rewards/margins": 23.244314193725586,
"rewards/real": -8.038549423217773,
"step": 910
},
{
"epoch": 1.51,
"learning_rate": 2.692307692307692e-08,
"logits/generated": -1.608758568763733,
"logits/real": -1.8891884088516235,
"logps/generated": -1000.3331298828125,
"logps/real": -517.9403076171875,
"loss": 0.0038,
"rewards/accuracies": 1.0,
"rewards/generated": -32.935813903808594,
"rewards/margins": 25.521175384521484,
"rewards/real": -7.414637565612793,
"step": 920
},
{
"epoch": 1.52,
"eval_logits/generated": -1.591917634010315,
"eval_logits/real": -1.7978274822235107,
"eval_logps/generated": -949.2075805664062,
"eval_logps/real": -524.6597290039062,
"eval_loss": 0.016565019264817238,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -30.672739028930664,
"eval_rewards/margins": 22.55101776123047,
"eval_rewards/real": -8.121725082397461,
"eval_runtime": 1802.0893,
"eval_samples_per_second": 2.396,
"eval_steps_per_second": 0.075,
"step": 925
},
{
"epoch": 1.53,
"learning_rate": 2.600732600732601e-08,
"logits/generated": -1.537630319595337,
"logits/real": -1.7378448247909546,
"logps/generated": -968.7054443359375,
"logps/real": -489.5370178222656,
"loss": 0.0073,
"rewards/accuracies": 1.0,
"rewards/generated": -32.376976013183594,
"rewards/margins": 24.455623626708984,
"rewards/real": -7.921347141265869,
"step": 930
},
{
"epoch": 1.55,
"learning_rate": 2.509157509157509e-08,
"logits/generated": -1.604174017906189,
"logits/real": -1.8114948272705078,
"logps/generated": -937.0480346679688,
"logps/real": -495.5521545410156,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -30.041767120361328,
"rewards/margins": 22.23421859741211,
"rewards/real": -7.807549953460693,
"step": 940
},
{
"epoch": 1.56,
"learning_rate": 2.4175824175824175e-08,
"logits/generated": -1.5735671520233154,
"logits/real": -1.7788879871368408,
"logps/generated": -938.4112548828125,
"logps/real": -486.3994140625,
"loss": 0.0096,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -29.847030639648438,
"rewards/margins": 22.341915130615234,
"rewards/real": -7.505116939544678,
"step": 950
},
{
"epoch": 1.56,
"eval_logits/generated": -1.5908763408660889,
"eval_logits/real": -1.7955536842346191,
"eval_logps/generated": -943.6237182617188,
"eval_logps/real": -521.6991577148438,
"eval_loss": 0.016153085976839066,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -30.114360809326172,
"eval_rewards/margins": 22.2886962890625,
"eval_rewards/real": -7.825665473937988,
"eval_runtime": 1801.6388,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 950
},
{
"epoch": 1.58,
"learning_rate": 2.326007326007326e-08,
"logits/generated": -1.5542490482330322,
"logits/real": -1.7995363473892212,
"logps/generated": -1011.2404174804688,
"logps/real": -517.3983764648438,
"loss": 0.0043,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -32.60115432739258,
"rewards/margins": 24.621551513671875,
"rewards/real": -7.979601860046387,
"step": 960
},
{
"epoch": 1.6,
"learning_rate": 2.2344322344322346e-08,
"logits/generated": -1.5683870315551758,
"logits/real": -1.7601861953735352,
"logps/generated": -916.2017822265625,
"logps/real": -493.31494140625,
"loss": 0.0057,
"rewards/accuracies": 1.0,
"rewards/generated": -29.836261749267578,
"rewards/margins": 22.397926330566406,
"rewards/real": -7.438332557678223,
"step": 970
},
{
"epoch": 1.6,
"eval_logits/generated": -1.5854144096374512,
"eval_logits/real": -1.7919222116470337,
"eval_logps/generated": -949.1341552734375,
"eval_logps/real": -523.7774658203125,
"eval_loss": 0.0166173093020916,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -30.665393829345703,
"eval_rewards/margins": 22.631893157958984,
"eval_rewards/real": -8.033498764038086,
"eval_runtime": 1798.076,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 975
},
{
"epoch": 1.61,
"learning_rate": 2.1428571428571426e-08,
"logits/generated": -1.6264305114746094,
"logits/real": -1.851205825805664,
"logps/generated": -929.2615356445312,
"logps/real": -481.2755432128906,
"loss": 0.0019,
"rewards/accuracies": 1.0,
"rewards/generated": -30.502222061157227,
"rewards/margins": 23.29401206970215,
"rewards/real": -7.2082085609436035,
"step": 980
},
{
"epoch": 1.63,
"learning_rate": 2.0512820512820512e-08,
"logits/generated": -1.6349788904190063,
"logits/real": -1.8103811740875244,
"logps/generated": -930.9240112304688,
"logps/real": -486.3633728027344,
"loss": 0.002,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -28.78774642944336,
"rewards/margins": 21.5825138092041,
"rewards/real": -7.205234527587891,
"step": 990
},
{
"epoch": 1.65,
"learning_rate": 1.9597069597069596e-08,
"logits/generated": -1.5818378925323486,
"logits/real": -1.8051410913467407,
"logps/generated": -994.4307861328125,
"logps/real": -506.43048095703125,
"loss": 0.0046,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -32.12641143798828,
"rewards/margins": 24.237791061401367,
"rewards/real": -7.888618469238281,
"step": 1000
},
{
"epoch": 1.65,
"eval_logits/generated": -1.5768269300460815,
"eval_logits/real": -1.7851576805114746,
"eval_logps/generated": -952.6190795898438,
"eval_logps/real": -525.199951171875,
"eval_loss": 0.016495853662490845,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.013896942138672,
"eval_rewards/margins": 22.838150024414062,
"eval_rewards/real": -8.175748825073242,
"eval_runtime": 1801.7659,
"eval_samples_per_second": 2.397,
"eval_steps_per_second": 0.075,
"step": 1000
},
{
"epoch": 1.66,
"learning_rate": 1.868131868131868e-08,
"logits/generated": -1.5746369361877441,
"logits/real": -1.8138281106948853,
"logps/generated": -935.8997192382812,
"logps/real": -524.5531616210938,
"loss": 0.0043,
"rewards/accuracies": 1.0,
"rewards/generated": -29.7835750579834,
"rewards/margins": 22.015628814697266,
"rewards/real": -7.767943382263184,
"step": 1010
},
{
"epoch": 1.68,
"learning_rate": 1.7765567765567766e-08,
"logits/generated": -1.617248296737671,
"logits/real": -1.8158845901489258,
"logps/generated": -975.7345581054688,
"logps/real": -520.7113037109375,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/generated": -31.075618743896484,
"rewards/margins": 23.180437088012695,
"rewards/real": -7.895182132720947,
"step": 1020
},
{
"epoch": 1.69,
"eval_logits/generated": -1.5756635665893555,
"eval_logits/real": -1.7830266952514648,
"eval_logps/generated": -950.0453491210938,
"eval_logps/real": -523.9951171875,
"eval_loss": 0.01654692552983761,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -30.75650978088379,
"eval_rewards/margins": 22.701244354248047,
"eval_rewards/real": -8.055268287658691,
"eval_runtime": 1788.1081,
"eval_samples_per_second": 2.415,
"eval_steps_per_second": 0.075,
"step": 1025
},
{
"epoch": 1.7,
"learning_rate": 1.684981684981685e-08,
"logits/generated": -1.591524362564087,
"logits/real": -1.7995145320892334,
"logps/generated": -919.4166259765625,
"logps/real": -488.1844787597656,
"loss": 0.0037,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -29.463176727294922,
"rewards/margins": 22.12551498413086,
"rewards/real": -7.337666988372803,
"step": 1030
},
{
"epoch": 1.71,
"learning_rate": 1.5934065934065933e-08,
"logits/generated": -1.5764684677124023,
"logits/real": -1.7932662963867188,
"logps/generated": -967.7091064453125,
"logps/real": -521.2521362304688,
"loss": 0.0034,
"rewards/accuracies": 1.0,
"rewards/generated": -31.743621826171875,
"rewards/margins": 24.00382423400879,
"rewards/real": -7.739800453186035,
"step": 1040
},
{
"epoch": 1.73,
"learning_rate": 1.5018315018315017e-08,
"logits/generated": -1.5970559120178223,
"logits/real": -1.8183799982070923,
"logps/generated": -954.7509765625,
"logps/real": -505.82757568359375,
"loss": 0.002,
"rewards/accuracies": 1.0,
"rewards/generated": -31.225833892822266,
"rewards/margins": 23.92045021057129,
"rewards/real": -7.305386543273926,
"step": 1050
},
{
"epoch": 1.73,
"eval_logits/generated": -1.5691884756088257,
"eval_logits/real": -1.7789667844772339,
"eval_logps/generated": -955.8453369140625,
"eval_logps/real": -525.2799682617188,
"eval_loss": 0.01644195057451725,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.336515426635742,
"eval_rewards/margins": 23.152767181396484,
"eval_rewards/real": -8.18375015258789,
"eval_runtime": 1807.2715,
"eval_samples_per_second": 2.389,
"eval_steps_per_second": 0.075,
"step": 1050
},
{
"epoch": 1.74,
"learning_rate": 1.4102564102564102e-08,
"logits/generated": -1.5513131618499756,
"logits/real": -1.7797822952270508,
"logps/generated": -938.0900268554688,
"logps/real": -503.44921875,
"loss": 0.0041,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -31.36539649963379,
"rewards/margins": 23.386436462402344,
"rewards/real": -7.978959083557129,
"step": 1060
},
{
"epoch": 1.76,
"learning_rate": 1.3186813186813187e-08,
"logits/generated": -1.6085302829742432,
"logits/real": -1.7643792629241943,
"logps/generated": -978.6404418945312,
"logps/real": -512.4838256835938,
"loss": 0.0069,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -31.299551010131836,
"rewards/margins": 23.147233963012695,
"rewards/real": -8.152318000793457,
"step": 1070
},
{
"epoch": 1.77,
"eval_logits/generated": -1.57485032081604,
"eval_logits/real": -1.7849942445755005,
"eval_logps/generated": -956.59814453125,
"eval_logps/real": -525.350830078125,
"eval_loss": 0.01633109152317047,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.411802291870117,
"eval_rewards/margins": 23.220966339111328,
"eval_rewards/real": -8.190834999084473,
"eval_runtime": 1798.0611,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.075,
"step": 1075
},
{
"epoch": 1.78,
"learning_rate": 1.227106227106227e-08,
"logits/generated": -1.548825979232788,
"logits/real": -1.7612594366073608,
"logps/generated": -1017.5808715820312,
"logps/real": -520.19384765625,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/generated": -33.57468795776367,
"rewards/margins": 25.4267520904541,
"rewards/real": -8.147936820983887,
"step": 1080
},
{
"epoch": 1.79,
"learning_rate": 1.1355311355311355e-08,
"logits/generated": -1.556921362876892,
"logits/real": -1.7643944025039673,
"logps/generated": -938.5661010742188,
"logps/real": -503.1206970214844,
"loss": 0.0034,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -29.682641983032227,
"rewards/margins": 21.84661102294922,
"rewards/real": -7.83603048324585,
"step": 1090
},
{
"epoch": 1.81,
"learning_rate": 1.0439560439560439e-08,
"logits/generated": -1.5458358526229858,
"logits/real": -1.7758142948150635,
"logps/generated": -942.8790893554688,
"logps/real": -487.7559509277344,
"loss": 0.0029,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -31.93533706665039,
"rewards/margins": 24.24590492248535,
"rewards/real": -7.689431667327881,
"step": 1100
},
{
"epoch": 1.81,
"eval_logits/generated": -1.5624111890792847,
"eval_logits/real": -1.7751930952072144,
"eval_logps/generated": -963.309814453125,
"eval_logps/real": -527.5802001953125,
"eval_loss": 0.016566824167966843,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -32.08296585083008,
"eval_rewards/margins": 23.669187545776367,
"eval_rewards/real": -8.413775444030762,
"eval_runtime": 1800.7952,
"eval_samples_per_second": 2.398,
"eval_steps_per_second": 0.075,
"step": 1100
},
{
"epoch": 1.83,
"learning_rate": 9.523809523809522e-09,
"logits/generated": -1.5966811180114746,
"logits/real": -1.791329026222229,
"logps/generated": -976.7927856445312,
"logps/real": -493.8138122558594,
"loss": 0.0047,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -32.71385955810547,
"rewards/margins": 24.73776626586914,
"rewards/real": -7.9760942459106445,
"step": 1110
},
{
"epoch": 1.84,
"learning_rate": 8.608058608058607e-09,
"logits/generated": -1.5438224077224731,
"logits/real": -1.7942355871200562,
"logps/generated": -1013.4166870117188,
"logps/real": -529.5133056640625,
"loss": 0.0047,
"rewards/accuracies": 1.0,
"rewards/generated": -34.10750961303711,
"rewards/margins": 25.782058715820312,
"rewards/real": -8.325451850891113,
"step": 1120
},
{
"epoch": 1.85,
"eval_logits/generated": -1.5631078481674194,
"eval_logits/real": -1.775943398475647,
"eval_logps/generated": -964.0065307617188,
"eval_logps/real": -527.6651611328125,
"eval_loss": 0.016596974804997444,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -32.15264129638672,
"eval_rewards/margins": 23.730371475219727,
"eval_rewards/real": -8.422268867492676,
"eval_runtime": 1805.5605,
"eval_samples_per_second": 2.392,
"eval_steps_per_second": 0.075,
"step": 1125
},
{
"epoch": 1.86,
"learning_rate": 7.692307692307693e-09,
"logits/generated": -1.6013424396514893,
"logits/real": -1.8322757482528687,
"logps/generated": -1004.1285400390625,
"logps/real": -520.826416015625,
"loss": 0.0019,
"rewards/accuracies": 1.0,
"rewards/generated": -34.476234436035156,
"rewards/margins": 25.89908218383789,
"rewards/real": -8.577150344848633,
"step": 1130
},
{
"epoch": 1.88,
"learning_rate": 6.776556776556776e-09,
"logits/generated": -1.6247609853744507,
"logits/real": -1.8308923244476318,
"logps/generated": -974.4166870117188,
"logps/real": -467.469970703125,
"loss": 0.0065,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -33.500511169433594,
"rewards/margins": 25.762847900390625,
"rewards/real": -7.737664699554443,
"step": 1140
},
{
"epoch": 1.89,
"learning_rate": 5.86080586080586e-09,
"logits/generated": -1.5782761573791504,
"logits/real": -1.8114595413208008,
"logps/generated": -984.72265625,
"logps/real": -521.5853881835938,
"loss": 0.0037,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -32.02996826171875,
"rewards/margins": 24.080408096313477,
"rewards/real": -7.949559211730957,
"step": 1150
},
{
"epoch": 1.89,
"eval_logits/generated": -1.573925256729126,
"eval_logits/real": -1.7831730842590332,
"eval_logps/generated": -955.689453125,
"eval_logps/real": -525.0056762695312,
"eval_loss": 0.016293587163090706,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.32093048095703,
"eval_rewards/margins": 23.164613723754883,
"eval_rewards/real": -8.156318664550781,
"eval_runtime": 1805.9186,
"eval_samples_per_second": 2.391,
"eval_steps_per_second": 0.075,
"step": 1150
},
{
"epoch": 1.91,
"learning_rate": 4.945054945054945e-09,
"logits/generated": -1.5925065279006958,
"logits/real": -1.8153518438339233,
"logps/generated": -977.4358520507812,
"logps/real": -493.5396423339844,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/generated": -32.501895904541016,
"rewards/margins": 24.7786865234375,
"rewards/real": -7.723211765289307,
"step": 1160
},
{
"epoch": 1.93,
"learning_rate": 4.02930402930403e-09,
"logits/generated": -1.5815564393997192,
"logits/real": -1.7986618280410767,
"logps/generated": -984.4176025390625,
"logps/real": -496.143310546875,
"loss": 0.0026,
"rewards/accuracies": 1.0,
"rewards/generated": -30.3635311126709,
"rewards/margins": 22.687597274780273,
"rewards/real": -7.675933837890625,
"step": 1170
},
{
"epoch": 1.93,
"eval_logits/generated": -1.5708197355270386,
"eval_logits/real": -1.7807316780090332,
"eval_logps/generated": -957.48876953125,
"eval_logps/real": -525.5498046875,
"eval_loss": 0.016291461884975433,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.50086784362793,
"eval_rewards/margins": 23.290132522583008,
"eval_rewards/real": -8.210736274719238,
"eval_runtime": 1806.1638,
"eval_samples_per_second": 2.391,
"eval_steps_per_second": 0.075,
"step": 1175
},
{
"epoch": 1.94,
"learning_rate": 3.1135531135531137e-09,
"logits/generated": -1.6078064441680908,
"logits/real": -1.8417888879776,
"logps/generated": -980.16552734375,
"logps/real": -481.13250732421875,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/generated": -33.39889144897461,
"rewards/margins": 25.546215057373047,
"rewards/real": -7.852681636810303,
"step": 1180
},
{
"epoch": 1.96,
"learning_rate": 2.197802197802198e-09,
"logits/generated": -1.5867105722427368,
"logits/real": -1.8165124654769897,
"logps/generated": -991.1232299804688,
"logps/real": -548.1749267578125,
"loss": 0.0044,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -30.872753143310547,
"rewards/margins": 22.4478759765625,
"rewards/real": -8.424878120422363,
"step": 1190
},
{
"epoch": 1.98,
"learning_rate": 1.282051282051282e-09,
"logits/generated": -1.5666377544403076,
"logits/real": -1.8013808727264404,
"logps/generated": -938.0455932617188,
"logps/real": -504.17022705078125,
"loss": 0.0058,
"rewards/accuracies": 0.981249988079071,
"rewards/generated": -30.902517318725586,
"rewards/margins": 23.061681747436523,
"rewards/real": -7.840832710266113,
"step": 1200
},
{
"epoch": 1.98,
"eval_logits/generated": -1.5719400644302368,
"eval_logits/real": -1.7812800407409668,
"eval_logps/generated": -956.3063354492188,
"eval_logps/real": -525.1734619140625,
"eval_loss": 0.01621842570602894,
"eval_rewards/accuracies": 0.9916666746139526,
"eval_rewards/generated": -31.38262367248535,
"eval_rewards/margins": 23.20952606201172,
"eval_rewards/real": -8.173093795776367,
"eval_runtime": 1803.2665,
"eval_samples_per_second": 2.395,
"eval_steps_per_second": 0.075,
"step": 1200
},
{
"epoch": 1.99,
"learning_rate": 3.6630036630036627e-10,
"logits/generated": -1.5853986740112305,
"logits/real": -1.8437074422836304,
"logps/generated": -930.419921875,
"logps/real": -524.40087890625,
"loss": 0.0047,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -30.547359466552734,
"rewards/margins": 22.564682006835938,
"rewards/real": -7.982677459716797,
"step": 1210
},
{
"epoch": 2.0,
"step": 1214,
"total_flos": 0.0,
"train_loss": 0.03410133493748145,
"train_runtime": 146707.6169,
"train_samples_per_second": 0.53,
"train_steps_per_second": 0.008
}
],
"logging_steps": 10,
"max_steps": 1214,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}