|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9983539094650205, |
|
"eval_steps": 25, |
|
"global_step": 1214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.196721311475409e-10, |
|
"logits/generated": -2.0642459392547607, |
|
"logits/real": -2.1011667251586914, |
|
"logps/generated": -767.111328125, |
|
"logps/real": -424.18878173828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.196721311475408e-09, |
|
"logits/generated": -2.0099620819091797, |
|
"logits/real": -2.1245546340942383, |
|
"logps/generated": -645.1455688476562, |
|
"logps/real": -425.1603698730469, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/generated": -0.022459693253040314, |
|
"rewards/margins": 0.015254557132720947, |
|
"rewards/real": -0.007205137051641941, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6393442622950816e-08, |
|
"logits/generated": -2.017244338989258, |
|
"logits/real": -2.1224846839904785, |
|
"logps/generated": -626.304443359375, |
|
"logps/real": -415.47955322265625, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/generated": -0.31143561005592346, |
|
"rewards/margins": 0.1948009729385376, |
|
"rewards/real": -0.11663466691970825, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_logits/generated": -2.0004239082336426, |
|
"eval_logits/real": -2.0772550106048584, |
|
"eval_logps/generated": -656.7918701171875, |
|
"eval_logps/real": -449.63409423828125, |
|
"eval_loss": 0.4146920442581177, |
|
"eval_rewards/accuracies": 0.925000011920929, |
|
"eval_rewards/generated": -1.4311684370040894, |
|
"eval_rewards/margins": 0.8120061159133911, |
|
"eval_rewards/real": -0.6191622018814087, |
|
"eval_runtime": 1777.8699, |
|
"eval_samples_per_second": 2.429, |
|
"eval_steps_per_second": 0.076, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.459016393442623e-08, |
|
"logits/generated": -2.018745183944702, |
|
"logits/real": -2.120075225830078, |
|
"logps/generated": -681.8663940429688, |
|
"logps/real": -443.0894470214844, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -1.3025258779525757, |
|
"rewards/margins": 0.753677487373352, |
|
"rewards/real": -0.5488484501838684, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.278688524590163e-08, |
|
"logits/generated": -2.003990411758423, |
|
"logits/real": -2.0808629989624023, |
|
"logps/generated": -633.3893432617188, |
|
"logps/real": -416.35333251953125, |
|
"loss": 0.3191, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.1262094974517822, |
|
"rewards/margins": 1.220593810081482, |
|
"rewards/real": -0.9056156277656555, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0983606557377046e-08, |
|
"logits/generated": -1.9361705780029297, |
|
"logits/real": -2.0647199153900146, |
|
"logps/generated": -706.2001342773438, |
|
"logps/real": -468.58807373046875, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -3.8283188343048096, |
|
"rewards/margins": 2.2455239295959473, |
|
"rewards/real": -1.582794427871704, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/generated": -1.9305709600448608, |
|
"eval_logits/real": -2.0236809253692627, |
|
"eval_logps/generated": -692.5404052734375, |
|
"eval_logps/real": -463.74224853515625, |
|
"eval_loss": 0.1745266169309616, |
|
"eval_rewards/accuracies": 0.9518518447875977, |
|
"eval_rewards/generated": -5.006031036376953, |
|
"eval_rewards/margins": 2.9760546684265137, |
|
"eval_rewards/real": -2.029975652694702, |
|
"eval_runtime": 1800.9154, |
|
"eval_samples_per_second": 2.398, |
|
"eval_steps_per_second": 0.075, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.918032786885246e-08, |
|
"logits/generated": -1.953768014907837, |
|
"logits/real": -2.022987127304077, |
|
"logps/generated": -717.5247192382812, |
|
"logps/real": -424.16162109375, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.01259708404541, |
|
"rewards/margins": 3.8680121898651123, |
|
"rewards/real": -2.144585371017456, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.7377049180327866e-08, |
|
"logits/generated": -1.8369897603988647, |
|
"logits/real": -1.9798635244369507, |
|
"logps/generated": -716.9135131835938, |
|
"logps/real": -459.76275634765625, |
|
"loss": 0.1292, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.8557868003845215, |
|
"rewards/margins": 4.214533805847168, |
|
"rewards/real": -2.641252040863037, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/generated": -1.884318470954895, |
|
"eval_logits/real": -1.9886623620986938, |
|
"eval_logps/generated": -717.4470825195312, |
|
"eval_logps/real": -471.66973876953125, |
|
"eval_loss": 0.10119830071926117, |
|
"eval_rewards/accuracies": 0.9685184955596924, |
|
"eval_rewards/generated": -7.496694087982178, |
|
"eval_rewards/margins": 4.673972129821777, |
|
"eval_rewards/real": -2.8227217197418213, |
|
"eval_runtime": 1800.5623, |
|
"eval_samples_per_second": 2.398, |
|
"eval_steps_per_second": 0.075, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.557377049180327e-08, |
|
"logits/generated": -1.8934190273284912, |
|
"logits/real": -2.0053441524505615, |
|
"logps/generated": -735.2626953125, |
|
"logps/real": -467.66961669921875, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.727712154388428, |
|
"rewards/margins": 4.887805461883545, |
|
"rewards/real": -2.8399062156677246, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.377049180327869e-08, |
|
"logits/generated": -1.8700984716415405, |
|
"logits/real": -1.931532859802246, |
|
"logps/generated": -777.4097900390625, |
|
"logps/real": -457.1133728027344, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -9.006689071655273, |
|
"rewards/margins": 5.990359306335449, |
|
"rewards/real": -3.0163300037384033, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.196721311475409e-08, |
|
"logits/generated": -1.8540890216827393, |
|
"logits/real": -1.952444076538086, |
|
"logps/generated": -760.2003784179688, |
|
"logps/real": -471.71221923828125, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -8.933283805847168, |
|
"rewards/margins": 5.90076208114624, |
|
"rewards/real": -3.0325207710266113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/generated": -1.8507987260818481, |
|
"eval_logits/real": -1.9628313779830933, |
|
"eval_logps/generated": -735.65673828125, |
|
"eval_logps/real": -476.3786315917969, |
|
"eval_loss": 0.0675550326704979, |
|
"eval_rewards/accuracies": 0.9777777791023254, |
|
"eval_rewards/generated": -9.317663192749023, |
|
"eval_rewards/margins": 6.0240478515625, |
|
"eval_rewards/real": -3.2936155796051025, |
|
"eval_runtime": 1798.5965, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.01639344262295e-08, |
|
"logits/generated": -1.7943336963653564, |
|
"logits/real": -1.9300905466079712, |
|
"logps/generated": -762.0491943359375, |
|
"logps/real": -434.4507751464844, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.933004379272461, |
|
"rewards/margins": 6.8074140548706055, |
|
"rewards/real": -3.1255910396575928, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.836065573770492e-08, |
|
"logits/generated": -1.8353208303451538, |
|
"logits/real": -1.9718765020370483, |
|
"logps/generated": -750.9710693359375, |
|
"logps/real": -449.0281677246094, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -10.487658500671387, |
|
"rewards/margins": 7.143439292907715, |
|
"rewards/real": -3.3442184925079346, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/generated": -1.8123193979263306, |
|
"eval_logits/real": -1.9332078695297241, |
|
"eval_logps/generated": -755.2024536132812, |
|
"eval_logps/real": -480.7701110839844, |
|
"eval_loss": 0.04767724126577377, |
|
"eval_rewards/accuracies": 0.9824073910713196, |
|
"eval_rewards/generated": -11.27223014831543, |
|
"eval_rewards/margins": 7.53946590423584, |
|
"eval_rewards/real": -3.73276424407959, |
|
"eval_runtime": 1803.3715, |
|
"eval_samples_per_second": 2.394, |
|
"eval_steps_per_second": 0.075, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.926739926739926e-08, |
|
"logits/generated": -1.8151371479034424, |
|
"logits/real": -1.9583898782730103, |
|
"logps/generated": -810.3426513671875, |
|
"logps/real": -501.2919921875, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.520541191101074, |
|
"rewards/margins": 7.8352227210998535, |
|
"rewards/real": -3.6853184700012207, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.835164835164835e-08, |
|
"logits/generated": -1.8159958124160767, |
|
"logits/real": -1.9128528833389282, |
|
"logps/generated": -802.4890747070312, |
|
"logps/real": -464.2137756347656, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -12.06971263885498, |
|
"rewards/margins": 8.350500106811523, |
|
"rewards/real": -3.719212055206299, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.743589743589743e-08, |
|
"logits/generated": -1.754547119140625, |
|
"logits/real": -1.9312493801116943, |
|
"logps/generated": -814.783935546875, |
|
"logps/real": -467.99609375, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -13.577325820922852, |
|
"rewards/margins": 9.742910385131836, |
|
"rewards/real": -3.83441424369812, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/generated": -1.7938494682312012, |
|
"eval_logits/real": -1.9225581884384155, |
|
"eval_logps/generated": -775.0787353515625, |
|
"eval_logps/real": -485.6038818359375, |
|
"eval_loss": 0.036931850016117096, |
|
"eval_rewards/accuracies": 0.9870370626449585, |
|
"eval_rewards/generated": -13.259866714477539, |
|
"eval_rewards/margins": 9.043731689453125, |
|
"eval_rewards/real": -4.216136932373047, |
|
"eval_runtime": 1778.5818, |
|
"eval_samples_per_second": 2.428, |
|
"eval_steps_per_second": 0.076, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.652014652014652e-08, |
|
"logits/generated": -1.7989473342895508, |
|
"logits/real": -1.9711263179779053, |
|
"logps/generated": -816.1602783203125, |
|
"logps/real": -429.8924865722656, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.095451354980469, |
|
"rewards/margins": 10.274964332580566, |
|
"rewards/real": -3.820486545562744, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.56043956043956e-08, |
|
"logits/generated": -1.8312492370605469, |
|
"logits/real": -1.9516799449920654, |
|
"logps/generated": -803.2518920898438, |
|
"logps/real": -467.13983154296875, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -13.788885116577148, |
|
"rewards/margins": 9.561357498168945, |
|
"rewards/real": -4.227527618408203, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/generated": -1.7758067846298218, |
|
"eval_logits/real": -1.9115736484527588, |
|
"eval_logps/generated": -792.9690551757812, |
|
"eval_logps/real": -490.6431579589844, |
|
"eval_loss": 0.03204120323061943, |
|
"eval_rewards/accuracies": 0.9879629611968994, |
|
"eval_rewards/generated": -15.048893928527832, |
|
"eval_rewards/margins": 10.328824043273926, |
|
"eval_rewards/real": -4.720070838928223, |
|
"eval_runtime": 1779.5528, |
|
"eval_samples_per_second": 2.426, |
|
"eval_steps_per_second": 0.076, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.468864468864468e-08, |
|
"logits/generated": -1.8363538980484009, |
|
"logits/real": -1.9760059118270874, |
|
"logps/generated": -798.1632690429688, |
|
"logps/real": -465.4309997558594, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.945306777954102, |
|
"rewards/margins": 9.537097930908203, |
|
"rewards/real": -4.40820837020874, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.377289377289377e-08, |
|
"logits/generated": -1.7771434783935547, |
|
"logits/real": -1.918859839439392, |
|
"logps/generated": -778.4981689453125, |
|
"logps/real": -445.9978942871094, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -15.705945014953613, |
|
"rewards/margins": 11.227587699890137, |
|
"rewards/real": -4.478354454040527, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.285714285714286e-08, |
|
"logits/generated": -1.7843818664550781, |
|
"logits/real": -1.9129893779754639, |
|
"logps/generated": -816.643310546875, |
|
"logps/real": -449.44171142578125, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.523761749267578, |
|
"rewards/margins": 11.881709098815918, |
|
"rewards/real": -4.642051696777344, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/generated": -1.7515002489089966, |
|
"eval_logits/real": -1.8923099040985107, |
|
"eval_logps/generated": -806.0497436523438, |
|
"eval_logps/real": -494.1994934082031, |
|
"eval_loss": 0.030071575194597244, |
|
"eval_rewards/accuracies": 0.9879629611968994, |
|
"eval_rewards/generated": -16.356964111328125, |
|
"eval_rewards/margins": 11.28126049041748, |
|
"eval_rewards/real": -5.0757036209106445, |
|
"eval_runtime": 1798.4488, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.194139194139193e-08, |
|
"logits/generated": -1.7697616815567017, |
|
"logits/real": -1.9165000915527344, |
|
"logps/generated": -853.5462646484375, |
|
"logps/real": -462.77484130859375, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.052305221557617, |
|
"rewards/margins": 13.306139945983887, |
|
"rewards/real": -4.746166229248047, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.102564102564102e-08, |
|
"logits/generated": -1.7060960531234741, |
|
"logits/real": -1.8868176937103271, |
|
"logps/generated": -852.2977294921875, |
|
"logps/real": -465.27099609375, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.251428604125977, |
|
"rewards/margins": 14.104260444641113, |
|
"rewards/real": -5.1471662521362305, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/generated": -1.7361782789230347, |
|
"eval_logits/real": -1.8820877075195312, |
|
"eval_logps/generated": -819.2310180664062, |
|
"eval_logps/real": -497.7419128417969, |
|
"eval_loss": 0.027269212529063225, |
|
"eval_rewards/accuracies": 0.9879629611968994, |
|
"eval_rewards/generated": -17.67508888244629, |
|
"eval_rewards/margins": 12.245142936706543, |
|
"eval_rewards/real": -5.4299445152282715, |
|
"eval_runtime": 1798.3015, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.010989010989011e-08, |
|
"logits/generated": -1.7464491128921509, |
|
"logits/real": -1.911118507385254, |
|
"logps/generated": -843.2506103515625, |
|
"logps/real": -463.8089294433594, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -18.588809967041016, |
|
"rewards/margins": 13.585103034973145, |
|
"rewards/real": -5.0037055015563965, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.91941391941392e-08, |
|
"logits/generated": -1.7304248809814453, |
|
"logits/real": -1.868173360824585, |
|
"logps/generated": -819.3607177734375, |
|
"logps/real": -445.7488708496094, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -17.680866241455078, |
|
"rewards/margins": 12.903231620788574, |
|
"rewards/real": -4.777635097503662, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.827838827838827e-08, |
|
"logits/generated": -1.7114464044570923, |
|
"logits/real": -1.8701032400131226, |
|
"logps/generated": -823.8060302734375, |
|
"logps/real": -483.9420471191406, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -17.834732055664062, |
|
"rewards/margins": 12.7833251953125, |
|
"rewards/real": -5.051407814025879, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/generated": -1.729956865310669, |
|
"eval_logits/real": -1.8793208599090576, |
|
"eval_logps/generated": -826.3790893554688, |
|
"eval_logps/real": -497.6258544921875, |
|
"eval_loss": 0.025423016399145126, |
|
"eval_rewards/accuracies": 0.9888888597488403, |
|
"eval_rewards/generated": -18.389890670776367, |
|
"eval_rewards/margins": 12.971549034118652, |
|
"eval_rewards/real": -5.418341159820557, |
|
"eval_runtime": 1801.3119, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.736263736263736e-08, |
|
"logits/generated": -1.7352432012557983, |
|
"logits/real": -1.9060261249542236, |
|
"logps/generated": -870.6500854492188, |
|
"logps/real": -461.4039611816406, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.913497924804688, |
|
"rewards/margins": 14.00297737121582, |
|
"rewards/real": -4.910521030426025, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.644688644688645e-08, |
|
"logits/generated": -1.784847617149353, |
|
"logits/real": -1.9353469610214233, |
|
"logps/generated": -850.8590087890625, |
|
"logps/real": -482.2189025878906, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -19.183507919311523, |
|
"rewards/margins": 13.948068618774414, |
|
"rewards/real": -5.235440731048584, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/generated": -1.696116328239441, |
|
"eval_logits/real": -1.8563601970672607, |
|
"eval_logps/generated": -848.2400512695312, |
|
"eval_logps/real": -504.34259033203125, |
|
"eval_loss": 0.024484921246767044, |
|
"eval_rewards/accuracies": 0.9888888597488403, |
|
"eval_rewards/generated": -20.575990676879883, |
|
"eval_rewards/margins": 14.485980033874512, |
|
"eval_rewards/real": -6.09001350402832, |
|
"eval_runtime": 1801.2175, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.553113553113552e-08, |
|
"logits/generated": -1.6885887384414673, |
|
"logits/real": -1.873110055923462, |
|
"logps/generated": -874.4225463867188, |
|
"logps/real": -485.9969177246094, |
|
"loss": 0.0225, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -21.401386260986328, |
|
"rewards/margins": 15.265310287475586, |
|
"rewards/real": -6.136077404022217, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.461538461538461e-08, |
|
"logits/generated": -1.7388379573822021, |
|
"logits/real": -1.8577735424041748, |
|
"logps/generated": -902.2374267578125, |
|
"logps/real": -491.22247314453125, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -20.343700408935547, |
|
"rewards/margins": 14.47362232208252, |
|
"rewards/real": -5.870078086853027, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.36996336996337e-08, |
|
"logits/generated": -1.760310173034668, |
|
"logits/real": -1.9213718175888062, |
|
"logps/generated": -855.2713623046875, |
|
"logps/real": -482.796875, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -20.608051300048828, |
|
"rewards/margins": 15.096084594726562, |
|
"rewards/real": -5.511966228485107, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/generated": -1.693785309791565, |
|
"eval_logits/real": -1.8572747707366943, |
|
"eval_logps/generated": -849.6640014648438, |
|
"eval_logps/real": -502.681884765625, |
|
"eval_loss": 0.02240588143467903, |
|
"eval_rewards/accuracies": 0.989814817905426, |
|
"eval_rewards/generated": -20.7183895111084, |
|
"eval_rewards/margins": 14.79444694519043, |
|
"eval_rewards/real": -5.923939228057861, |
|
"eval_runtime": 1798.8833, |
|
"eval_samples_per_second": 2.4, |
|
"eval_steps_per_second": 0.075, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.278388278388278e-08, |
|
"logits/generated": -1.6374238729476929, |
|
"logits/real": -1.8183997869491577, |
|
"logps/generated": -923.6209106445312, |
|
"logps/real": -475.9380798339844, |
|
"loss": 0.1301, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -25.12307357788086, |
|
"rewards/margins": 18.438941955566406, |
|
"rewards/real": -6.684133052825928, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.186813186813186e-08, |
|
"logits/generated": -1.6634056568145752, |
|
"logits/real": -1.8855922222137451, |
|
"logps/generated": -906.6611328125, |
|
"logps/real": -488.73858642578125, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.581295013427734, |
|
"rewards/margins": 18.07442283630371, |
|
"rewards/real": -6.50687313079834, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/generated": -1.6521793603897095, |
|
"eval_logits/real": -1.8252357244491577, |
|
"eval_logps/generated": -883.6064453125, |
|
"eval_logps/real": -513.8781127929688, |
|
"eval_loss": 0.023403111845254898, |
|
"eval_rewards/accuracies": 0.989814817905426, |
|
"eval_rewards/generated": -24.112627029418945, |
|
"eval_rewards/margins": 17.069059371948242, |
|
"eval_rewards/real": -7.043565273284912, |
|
"eval_runtime": 1801.6344, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.095238095238095e-08, |
|
"logits/generated": -1.585889458656311, |
|
"logits/real": -1.804424524307251, |
|
"logps/generated": -845.4251708984375, |
|
"logps/real": -472.7271423339844, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -23.844438552856445, |
|
"rewards/margins": 17.44953727722168, |
|
"rewards/real": -6.39490270614624, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.003663003663003e-08, |
|
"logits/generated": -1.6383155584335327, |
|
"logits/real": -1.8644497394561768, |
|
"logps/generated": -858.1883544921875, |
|
"logps/real": -480.23931884765625, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.641630172729492, |
|
"rewards/margins": 15.211410522460938, |
|
"rewards/real": -5.4302215576171875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.912087912087911e-08, |
|
"logits/generated": -1.7638896703720093, |
|
"logits/real": -1.9181245565414429, |
|
"logps/generated": -808.9601440429688, |
|
"logps/real": -457.7825622558594, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.34470558166504, |
|
"rewards/margins": 14.01134204864502, |
|
"rewards/real": -5.333361625671387, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": -1.7082347869873047, |
|
"eval_logits/real": -1.8693056106567383, |
|
"eval_logps/generated": -852.1936645507812, |
|
"eval_logps/real": -499.138671875, |
|
"eval_loss": 0.021183772012591362, |
|
"eval_rewards/accuracies": 0.989814817905426, |
|
"eval_rewards/generated": -20.971355438232422, |
|
"eval_rewards/margins": 15.401734352111816, |
|
"eval_rewards/real": -5.569622039794922, |
|
"eval_runtime": 1777.6314, |
|
"eval_samples_per_second": 2.429, |
|
"eval_steps_per_second": 0.076, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.82051282051282e-08, |
|
"logits/generated": -1.7445008754730225, |
|
"logits/real": -1.909597396850586, |
|
"logps/generated": -885.7131958007812, |
|
"logps/real": -475.4231872558594, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.5659236907959, |
|
"rewards/margins": 16.31867218017578, |
|
"rewards/real": -5.247251033782959, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.72893772893773e-08, |
|
"logits/generated": -1.7469732761383057, |
|
"logits/real": -1.8687480688095093, |
|
"logps/generated": -842.2507934570312, |
|
"logps/real": -467.54583740234375, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -19.440731048583984, |
|
"rewards/margins": 14.80817985534668, |
|
"rewards/real": -4.632552146911621, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/generated": -1.7284820079803467, |
|
"eval_logits/real": -1.8896727561950684, |
|
"eval_logps/generated": -846.3809204101562, |
|
"eval_logps/real": -496.0889587402344, |
|
"eval_loss": 0.018172312527894974, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -20.39007568359375, |
|
"eval_rewards/margins": 15.125428199768066, |
|
"eval_rewards/real": -5.264645099639893, |
|
"eval_runtime": 1804.3242, |
|
"eval_samples_per_second": 2.393, |
|
"eval_steps_per_second": 0.075, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.637362637362636e-08, |
|
"logits/generated": -1.7371108531951904, |
|
"logits/real": -1.9044015407562256, |
|
"logps/generated": -852.88427734375, |
|
"logps/real": -489.7041015625, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -20.25876235961914, |
|
"rewards/margins": 15.011631965637207, |
|
"rewards/real": -5.247129917144775, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.545787545787545e-08, |
|
"logits/generated": -1.7183958292007446, |
|
"logits/real": -1.8460171222686768, |
|
"logps/generated": -853.3533935546875, |
|
"logps/real": -476.1839904785156, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -21.06509017944336, |
|
"rewards/margins": 15.901025772094727, |
|
"rewards/real": -5.164063453674316, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.454212454212454e-08, |
|
"logits/generated": -1.7633212804794312, |
|
"logits/real": -1.9220634698867798, |
|
"logps/generated": -842.59765625, |
|
"logps/real": -468.2982482910156, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -20.100887298583984, |
|
"rewards/margins": 15.023529052734375, |
|
"rewards/real": -5.077359199523926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_logits/generated": -1.7137374877929688, |
|
"eval_logits/real": -1.8782566785812378, |
|
"eval_logps/generated": -854.0593872070312, |
|
"eval_logps/real": -498.4993591308594, |
|
"eval_loss": 0.01818298175930977, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -21.157926559448242, |
|
"eval_rewards/margins": 15.652240753173828, |
|
"eval_rewards/real": -5.505686283111572, |
|
"eval_runtime": 1801.4399, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.362637362637363e-08, |
|
"logits/generated": -1.7656316757202148, |
|
"logits/real": -1.9041885137557983, |
|
"logps/generated": -824.7591552734375, |
|
"logps/real": -444.58935546875, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -20.36834144592285, |
|
"rewards/margins": 15.581771850585938, |
|
"rewards/real": -4.786566734313965, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.27106227106227e-08, |
|
"logits/generated": -1.6781879663467407, |
|
"logits/real": -1.8786585330963135, |
|
"logps/generated": -863.8435668945312, |
|
"logps/real": -460.38494873046875, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -22.69711685180664, |
|
"rewards/margins": 17.59657859802246, |
|
"rewards/real": -5.100537300109863, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/generated": -1.7230830192565918, |
|
"eval_logits/real": -1.8856515884399414, |
|
"eval_logps/generated": -849.9996948242188, |
|
"eval_logps/real": -496.84051513671875, |
|
"eval_loss": 0.017169104889035225, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -20.751964569091797, |
|
"eval_rewards/margins": 15.412163734436035, |
|
"eval_rewards/real": -5.33980131149292, |
|
"eval_runtime": 1779.5809, |
|
"eval_samples_per_second": 2.426, |
|
"eval_steps_per_second": 0.076, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.17948717948718e-08, |
|
"logits/generated": -1.7307789325714111, |
|
"logits/real": -1.8954929113388062, |
|
"logps/generated": -858.5565185546875, |
|
"logps/real": -445.24554443359375, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.846343994140625, |
|
"rewards/margins": 16.011089324951172, |
|
"rewards/real": -4.835254669189453, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.087912087912088e-08, |
|
"logits/generated": -1.7022396326065063, |
|
"logits/real": -1.8817275762557983, |
|
"logps/generated": -864.0067138671875, |
|
"logps/real": -429.09539794921875, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.980777740478516, |
|
"rewards/margins": 16.663347244262695, |
|
"rewards/real": -4.317431449890137, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.996336996336996e-08, |
|
"logits/generated": -1.8108078241348267, |
|
"logits/real": -1.9502532482147217, |
|
"logps/generated": -838.1130981445312, |
|
"logps/real": -445.90008544921875, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.511409759521484, |
|
"rewards/margins": 15.326568603515625, |
|
"rewards/real": -4.184841632843018, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/generated": -1.7465310096740723, |
|
"eval_logits/real": -1.9042091369628906, |
|
"eval_logps/generated": -836.2462768554688, |
|
"eval_logps/real": -490.04827880859375, |
|
"eval_loss": 0.016420260071754456, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -19.37661361694336, |
|
"eval_rewards/margins": 14.716034889221191, |
|
"eval_rewards/real": -4.660578727722168, |
|
"eval_runtime": 1791.4683, |
|
"eval_samples_per_second": 2.41, |
|
"eval_steps_per_second": 0.075, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.904761904761905e-08, |
|
"logits/generated": -1.7178394794464111, |
|
"logits/real": -1.9164073467254639, |
|
"logps/generated": -814.5615844726562, |
|
"logps/real": -447.05029296875, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.99026870727539, |
|
"rewards/margins": 14.712194442749023, |
|
"rewards/real": -4.278077125549316, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.813186813186813e-08, |
|
"logits/generated": -1.7678531408309937, |
|
"logits/real": -1.9162557125091553, |
|
"logps/generated": -866.0538330078125, |
|
"logps/real": -465.385986328125, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.878631591796875, |
|
"rewards/margins": 16.432056427001953, |
|
"rewards/real": -4.44657564163208, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/generated": -1.744537353515625, |
|
"eval_logits/real": -1.9063953161239624, |
|
"eval_logps/generated": -843.5385131835938, |
|
"eval_logps/real": -492.18194580078125, |
|
"eval_loss": 0.015999892726540565, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -20.1058349609375, |
|
"eval_rewards/margins": 15.231893539428711, |
|
"eval_rewards/real": -4.873941421508789, |
|
"eval_runtime": 1802.1278, |
|
"eval_samples_per_second": 2.396, |
|
"eval_steps_per_second": 0.075, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.721611721611721e-08, |
|
"logits/generated": -1.7018417119979858, |
|
"logits/real": -1.8882300853729248, |
|
"logps/generated": -894.1951293945312, |
|
"logps/real": -452.98052978515625, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.061939239501953, |
|
"rewards/margins": 17.689193725585938, |
|
"rewards/real": -4.372746467590332, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.63003663003663e-08, |
|
"logits/generated": -1.7523149251937866, |
|
"logits/real": -1.9084774255752563, |
|
"logps/generated": -886.0391845703125, |
|
"logps/real": -463.77880859375, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -21.94992446899414, |
|
"rewards/margins": 17.210201263427734, |
|
"rewards/real": -4.739726543426514, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.538461538461538e-08, |
|
"logits/generated": -1.7328628301620483, |
|
"logits/real": -1.9376299381256104, |
|
"logps/generated": -842.7811279296875, |
|
"logps/real": -459.4143981933594, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.233800888061523, |
|
"rewards/margins": 16.3835391998291, |
|
"rewards/real": -4.850262641906738, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/generated": -1.7434035539627075, |
|
"eval_logits/real": -1.9092177152633667, |
|
"eval_logps/generated": -852.0874633789062, |
|
"eval_logps/real": -494.6623229980469, |
|
"eval_loss": 0.015602019615471363, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -20.96072769165039, |
|
"eval_rewards/margins": 15.838738441467285, |
|
"eval_rewards/real": -5.121987342834473, |
|
"eval_runtime": 1801.3586, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.446886446886448e-08, |
|
"logits/generated": -1.7443310022354126, |
|
"logits/real": -1.906089186668396, |
|
"logps/generated": -855.3189697265625, |
|
"logps/real": -479.55206298828125, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -21.070411682128906, |
|
"rewards/margins": 15.907896041870117, |
|
"rewards/real": -5.162516117095947, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.355311355311355e-08, |
|
"logits/generated": -1.7915077209472656, |
|
"logits/real": -1.96005117893219, |
|
"logps/generated": -909.4000854492188, |
|
"logps/real": -510.1407165527344, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -22.668399810791016, |
|
"rewards/margins": 17.420974731445312, |
|
"rewards/real": -5.247425079345703, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/generated": -1.7357203960418701, |
|
"eval_logits/real": -1.903997778892517, |
|
"eval_logps/generated": -856.4739990234375, |
|
"eval_logps/real": -494.9234924316406, |
|
"eval_loss": 0.015464075841009617, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -21.399391174316406, |
|
"eval_rewards/margins": 16.25129508972168, |
|
"eval_rewards/real": -5.148096561431885, |
|
"eval_runtime": 1799.2565, |
|
"eval_samples_per_second": 2.4, |
|
"eval_steps_per_second": 0.075, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.263736263736263e-08, |
|
"logits/generated": -1.7220814228057861, |
|
"logits/real": -1.896211862564087, |
|
"logps/generated": -864.8518676757812, |
|
"logps/real": -481.7032775878906, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.411062240600586, |
|
"rewards/margins": 16.603487014770508, |
|
"rewards/real": -4.807575702667236, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.172161172161173e-08, |
|
"logits/generated": -1.7355706691741943, |
|
"logits/real": -1.9386104345321655, |
|
"logps/generated": -856.8909301757812, |
|
"logps/real": -455.5501403808594, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.787107467651367, |
|
"rewards/margins": 16.66501808166504, |
|
"rewards/real": -5.122087001800537, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.08058608058608e-08, |
|
"logits/generated": -1.6525169610977173, |
|
"logits/real": -1.872809648513794, |
|
"logps/generated": -870.7745361328125, |
|
"logps/real": -459.68572998046875, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -21.89577865600586, |
|
"rewards/margins": 16.658113479614258, |
|
"rewards/real": -5.237664222717285, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/generated": -1.713934302330017, |
|
"eval_logits/real": -1.8881142139434814, |
|
"eval_logps/generated": -872.0122680664062, |
|
"eval_logps/real": -499.5303955078125, |
|
"eval_loss": 0.015055526979267597, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -22.95322036743164, |
|
"eval_rewards/margins": 17.34442710876465, |
|
"eval_rewards/real": -5.608795166015625, |
|
"eval_runtime": 1796.1592, |
|
"eval_samples_per_second": 2.404, |
|
"eval_steps_per_second": 0.075, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.989010989010988e-08, |
|
"logits/generated": -1.7358171939849854, |
|
"logits/real": -1.907268762588501, |
|
"logps/generated": -841.7224731445312, |
|
"logps/real": -471.75689697265625, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.611263275146484, |
|
"rewards/margins": 16.323144912719727, |
|
"rewards/real": -5.288116455078125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.897435897435897e-08, |
|
"logits/generated": -1.7383735179901123, |
|
"logits/real": -1.929496169090271, |
|
"logps/generated": -876.6693115234375, |
|
"logps/real": -483.697998046875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.30849838256836, |
|
"rewards/margins": 17.91643714904785, |
|
"rewards/real": -5.392062664031982, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_logits/generated": -1.7112655639648438, |
|
"eval_logits/real": -1.8888392448425293, |
|
"eval_logps/generated": -877.6972045898438, |
|
"eval_logps/real": -500.6514587402344, |
|
"eval_loss": 0.01491004228591919, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -23.521709442138672, |
|
"eval_rewards/margins": 17.800806045532227, |
|
"eval_rewards/real": -5.7208991050720215, |
|
"eval_runtime": 1799.8979, |
|
"eval_samples_per_second": 2.399, |
|
"eval_steps_per_second": 0.075, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.805860805860806e-08, |
|
"logits/generated": -1.7410743236541748, |
|
"logits/real": -1.901346206665039, |
|
"logps/generated": -881.9417724609375, |
|
"logps/real": -473.64892578125, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -23.52071762084961, |
|
"rewards/margins": 17.951566696166992, |
|
"rewards/real": -5.569148540496826, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/generated": -1.7443621158599854, |
|
"logits/real": -1.8728523254394531, |
|
"logps/generated": -892.2806396484375, |
|
"logps/real": -453.60552978515625, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.209545135498047, |
|
"rewards/margins": 18.014381408691406, |
|
"rewards/real": -5.195165157318115, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.622710622710623e-08, |
|
"logits/generated": -1.7170673608779907, |
|
"logits/real": -1.889995813369751, |
|
"logps/generated": -875.4261474609375, |
|
"logps/real": -450.47576904296875, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.910694122314453, |
|
"rewards/margins": 18.570858001708984, |
|
"rewards/real": -5.339831352233887, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/generated": -1.708635926246643, |
|
"eval_logits/real": -1.8878159523010254, |
|
"eval_logps/generated": -879.9544067382812, |
|
"eval_logps/real": -500.9651184082031, |
|
"eval_loss": 0.01472516916692257, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -23.74742317199707, |
|
"eval_rewards/margins": 17.995161056518555, |
|
"eval_rewards/real": -5.752264022827148, |
|
"eval_runtime": 1800.102, |
|
"eval_samples_per_second": 2.399, |
|
"eval_steps_per_second": 0.075, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.531135531135531e-08, |
|
"logits/generated": -1.6815847158432007, |
|
"logits/real": -1.917245626449585, |
|
"logps/generated": -922.5784912109375, |
|
"logps/real": -475.2225646972656, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.03234100341797, |
|
"rewards/margins": 19.452983856201172, |
|
"rewards/real": -5.579358100891113, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.439560439560439e-08, |
|
"logits/generated": -1.7002710103988647, |
|
"logits/real": -1.886687994003296, |
|
"logps/generated": -922.806640625, |
|
"logps/real": -481.56787109375, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.185983657836914, |
|
"rewards/margins": 19.83902359008789, |
|
"rewards/real": -5.346956729888916, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/generated": -1.6730928421020508, |
|
"eval_logits/real": -1.8584686517715454, |
|
"eval_logps/generated": -891.3632202148438, |
|
"eval_logps/real": -505.2818298339844, |
|
"eval_loss": 0.015368033200502396, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -24.888301849365234, |
|
"eval_rewards/margins": 18.704362869262695, |
|
"eval_rewards/real": -6.183938026428223, |
|
"eval_runtime": 1782.9432, |
|
"eval_samples_per_second": 2.422, |
|
"eval_steps_per_second": 0.076, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.347985347985348e-08, |
|
"logits/generated": -1.6648222208023071, |
|
"logits/real": -1.8549429178237915, |
|
"logps/generated": -912.20654296875, |
|
"logps/real": -520.7745361328125, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.70351791381836, |
|
"rewards/margins": 18.5965518951416, |
|
"rewards/real": -6.106965065002441, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.256410256410256e-08, |
|
"logits/generated": -1.704904556274414, |
|
"logits/real": -1.8857839107513428, |
|
"logps/generated": -927.8040771484375, |
|
"logps/real": -481.84820556640625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -26.38739585876465, |
|
"rewards/margins": 20.759992599487305, |
|
"rewards/real": -5.627402305603027, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.164835164835165e-08, |
|
"logits/generated": -1.7228724956512451, |
|
"logits/real": -1.8705856800079346, |
|
"logps/generated": -894.1580810546875, |
|
"logps/real": -462.95697021484375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -24.506175994873047, |
|
"rewards/margins": 18.540332794189453, |
|
"rewards/real": -5.96584415435791, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_logits/generated": -1.6592012643814087, |
|
"eval_logits/real": -1.848427176475525, |
|
"eval_logps/generated": -901.4036865234375, |
|
"eval_logps/real": -508.3891906738281, |
|
"eval_loss": 0.015495581552386284, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -25.892351150512695, |
|
"eval_rewards/margins": 19.397686004638672, |
|
"eval_rewards/real": -6.494665145874023, |
|
"eval_runtime": 1785.6862, |
|
"eval_samples_per_second": 2.418, |
|
"eval_steps_per_second": 0.076, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.073260073260073e-08, |
|
"logits/generated": -1.6706949472427368, |
|
"logits/real": -1.8376855850219727, |
|
"logps/generated": -927.1456298828125, |
|
"logps/real": -447.51123046875, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -27.89125633239746, |
|
"rewards/margins": 21.775976181030273, |
|
"rewards/real": -6.1152777671813965, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.981684981684982e-08, |
|
"logits/generated": -1.6462090015411377, |
|
"logits/real": -1.8371574878692627, |
|
"logps/generated": -882.9945068359375, |
|
"logps/real": -455.0235290527344, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.57509994506836, |
|
"rewards/margins": 19.432373046875, |
|
"rewards/real": -6.142725944519043, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_logits/generated": -1.640711784362793, |
|
"eval_logits/real": -1.8339245319366455, |
|
"eval_logps/generated": -911.697021484375, |
|
"eval_logps/real": -511.9859313964844, |
|
"eval_loss": 0.01578509621322155, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -26.921693801879883, |
|
"eval_rewards/margins": 20.067354202270508, |
|
"eval_rewards/real": -6.854339599609375, |
|
"eval_runtime": 1782.1365, |
|
"eval_samples_per_second": 2.423, |
|
"eval_steps_per_second": 0.076, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.8901098901098895e-08, |
|
"logits/generated": -1.6573280096054077, |
|
"logits/real": -1.885148286819458, |
|
"logps/generated": -923.4420776367188, |
|
"logps/real": -483.80621337890625, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -28.48358154296875, |
|
"rewards/margins": 21.583927154541016, |
|
"rewards/real": -6.899654388427734, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.7985347985347985e-08, |
|
"logits/generated": -1.6559168100357056, |
|
"logits/real": -1.8097467422485352, |
|
"logps/generated": -922.6828002929688, |
|
"logps/real": -488.87664794921875, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.857372283935547, |
|
"rewards/margins": 20.4537353515625, |
|
"rewards/real": -6.403636932373047, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.706959706959707e-08, |
|
"logits/generated": -1.6271326541900635, |
|
"logits/real": -1.840662956237793, |
|
"logps/generated": -901.0564575195312, |
|
"logps/real": -491.31146240234375, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.303613662719727, |
|
"rewards/margins": 20.737079620361328, |
|
"rewards/real": -6.566534996032715, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_logits/generated": -1.6268597841262817, |
|
"eval_logits/real": -1.8235687017440796, |
|
"eval_logps/generated": -920.2236328125, |
|
"eval_logps/real": -514.767822265625, |
|
"eval_loss": 0.015848280861973763, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -27.77434730529785, |
|
"eval_rewards/margins": 20.641807556152344, |
|
"eval_rewards/real": -7.132537841796875, |
|
"eval_runtime": 1781.0614, |
|
"eval_samples_per_second": 2.424, |
|
"eval_steps_per_second": 0.076, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.615384615384615e-08, |
|
"logits/generated": -1.6111915111541748, |
|
"logits/real": -1.8123859167099, |
|
"logps/generated": -929.34326171875, |
|
"logps/real": -504.7548828125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.69466781616211, |
|
"rewards/margins": 20.481304168701172, |
|
"rewards/real": -7.213364601135254, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.5238095238095236e-08, |
|
"logits/generated": -1.6448142528533936, |
|
"logits/real": -1.8344615697860718, |
|
"logps/generated": -938.2579345703125, |
|
"logps/real": -477.796630859375, |
|
"loss": 0.0168, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.318180084228516, |
|
"rewards/margins": 21.399818420410156, |
|
"rewards/real": -6.918364524841309, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_logits/generated": -1.656567931175232, |
|
"eval_logits/real": -1.8447872400283813, |
|
"eval_logps/generated": -905.2711181640625, |
|
"eval_logps/real": -512.4611206054688, |
|
"eval_loss": 0.015721740201115608, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -26.27910041809082, |
|
"eval_rewards/margins": 19.37723731994629, |
|
"eval_rewards/real": -6.901863098144531, |
|
"eval_runtime": 1781.2515, |
|
"eval_samples_per_second": 2.424, |
|
"eval_steps_per_second": 0.076, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.432234432234432e-08, |
|
"logits/generated": -1.6806806325912476, |
|
"logits/real": -1.909649133682251, |
|
"logps/generated": -898.3150634765625, |
|
"logps/real": -469.5130920410156, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.730077743530273, |
|
"rewards/margins": 19.823253631591797, |
|
"rewards/real": -5.906826019287109, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.34065934065934e-08, |
|
"logits/generated": -1.734819769859314, |
|
"logits/real": -1.9214661121368408, |
|
"logps/generated": -965.8170776367188, |
|
"logps/real": -494.10760498046875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.31577491760254, |
|
"rewards/margins": 20.62114906311035, |
|
"rewards/real": -6.6946234703063965, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.2490842490842486e-08, |
|
"logits/generated": -1.7267796993255615, |
|
"logits/real": -1.929030179977417, |
|
"logps/generated": -886.3689575195312, |
|
"logps/real": -481.1717834472656, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.81966781616211, |
|
"rewards/margins": 18.82332992553711, |
|
"rewards/real": -5.996334552764893, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_logits/generated": -1.6533170938491821, |
|
"eval_logits/real": -1.8422995805740356, |
|
"eval_logps/generated": -907.6251220703125, |
|
"eval_logps/real": -513.0281372070312, |
|
"eval_loss": 0.016253722831606865, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -26.514497756958008, |
|
"eval_rewards/margins": 19.55593490600586, |
|
"eval_rewards/real": -6.958561897277832, |
|
"eval_runtime": 1784.393, |
|
"eval_samples_per_second": 2.42, |
|
"eval_steps_per_second": 0.076, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.1575091575091576e-08, |
|
"logits/generated": -1.6823298931121826, |
|
"logits/real": -1.8801406621932983, |
|
"logps/generated": -974.8401489257812, |
|
"logps/real": -505.0538635253906, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.211589813232422, |
|
"rewards/margins": 21.735652923583984, |
|
"rewards/real": -6.4759368896484375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.065934065934066e-08, |
|
"logits/generated": -1.6613868474960327, |
|
"logits/real": -1.8679672479629517, |
|
"logps/generated": -923.1788330078125, |
|
"logps/real": -530.2198486328125, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.568191528320312, |
|
"rewards/margins": 19.437541961669922, |
|
"rewards/real": -7.130646705627441, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_logits/generated": -1.6368576288223267, |
|
"eval_logits/real": -1.8327449560165405, |
|
"eval_logps/generated": -924.7037963867188, |
|
"eval_logps/real": -518.8289184570312, |
|
"eval_loss": 0.0164579376578331, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -28.222370147705078, |
|
"eval_rewards/margins": 20.6837215423584, |
|
"eval_rewards/real": -7.538645267486572, |
|
"eval_runtime": 1783.8825, |
|
"eval_samples_per_second": 2.421, |
|
"eval_steps_per_second": 0.076, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.9743589743589737e-08, |
|
"logits/generated": -1.635840654373169, |
|
"logits/real": -1.838230848312378, |
|
"logps/generated": -982.92529296875, |
|
"logps/real": -524.0345458984375, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -28.60614585876465, |
|
"rewards/margins": 21.019775390625, |
|
"rewards/real": -7.586370944976807, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.8827838827838827e-08, |
|
"logits/generated": -1.558452844619751, |
|
"logits/real": -1.8335201740264893, |
|
"logps/generated": -916.1337890625, |
|
"logps/real": -474.8262634277344, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -27.614761352539062, |
|
"rewards/margins": 20.65240478515625, |
|
"rewards/real": -6.9623517990112305, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.791208791208791e-08, |
|
"logits/generated": -1.59113347530365, |
|
"logits/real": -1.853981614112854, |
|
"logps/generated": -958.3170166015625, |
|
"logps/real": -483.369140625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.300689697265625, |
|
"rewards/margins": 23.347179412841797, |
|
"rewards/real": -6.9535112380981445, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_logits/generated": -1.6365333795547485, |
|
"eval_logits/real": -1.8344322443008423, |
|
"eval_logps/generated": -928.9208374023438, |
|
"eval_logps/real": -520.0109252929688, |
|
"eval_loss": 0.016453638672828674, |
|
"eval_rewards/accuracies": 0.9907407164573669, |
|
"eval_rewards/generated": -28.644060134887695, |
|
"eval_rewards/margins": 20.987220764160156, |
|
"eval_rewards/real": -7.6568403244018555, |
|
"eval_runtime": 1804.1661, |
|
"eval_samples_per_second": 2.393, |
|
"eval_steps_per_second": 0.075, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.6996336996336994e-08, |
|
"logits/generated": -1.6491447687149048, |
|
"logits/real": -1.8126541376113892, |
|
"logps/generated": -940.8536376953125, |
|
"logps/real": -472.17559814453125, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.954341888427734, |
|
"rewards/margins": 23.000102996826172, |
|
"rewards/real": -6.954239845275879, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.608058608058608e-08, |
|
"logits/generated": -1.6780191659927368, |
|
"logits/real": -1.888399362564087, |
|
"logps/generated": -910.19189453125, |
|
"logps/real": -508.4088439941406, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.487747192382812, |
|
"rewards/margins": 20.76497459411621, |
|
"rewards/real": -7.722770690917969, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_logits/generated": -1.6348390579223633, |
|
"eval_logits/real": -1.835233449935913, |
|
"eval_logps/generated": -934.5078125, |
|
"eval_logps/real": -521.4318237304688, |
|
"eval_loss": 0.016549235209822655, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -29.202777862548828, |
|
"eval_rewards/margins": 21.403844833374023, |
|
"eval_rewards/real": -7.7989301681518555, |
|
"eval_runtime": 1798.7077, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.516483516483517e-08, |
|
"logits/generated": -1.6131916046142578, |
|
"logits/real": -1.8359510898590088, |
|
"logps/generated": -923.1203002929688, |
|
"logps/real": -487.068359375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -29.090587615966797, |
|
"rewards/margins": 21.882305145263672, |
|
"rewards/real": -7.208279609680176, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.424908424908425e-08, |
|
"logits/generated": -1.6657575368881226, |
|
"logits/real": -1.8384662866592407, |
|
"logps/generated": -914.0133056640625, |
|
"logps/real": -461.9369201660156, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -27.760547637939453, |
|
"rewards/margins": 21.031766891479492, |
|
"rewards/real": -6.7287774085998535, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.333333333333333e-08, |
|
"logits/generated": -1.6930701732635498, |
|
"logits/real": -1.907292366027832, |
|
"logps/generated": -965.4385986328125, |
|
"logps/real": -523.5211181640625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.622669219970703, |
|
"rewards/margins": 22.505613327026367, |
|
"rewards/real": -8.117053031921387, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/generated": -1.6166415214538574, |
|
"eval_logits/real": -1.8168882131576538, |
|
"eval_logps/generated": -938.438232421875, |
|
"eval_logps/real": -522.4202880859375, |
|
"eval_loss": 0.016505062580108643, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -29.59580421447754, |
|
"eval_rewards/margins": 21.69802474975586, |
|
"eval_rewards/real": -7.897781848907471, |
|
"eval_runtime": 1800.0221, |
|
"eval_samples_per_second": 2.399, |
|
"eval_steps_per_second": 0.075, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.241758241758242e-08, |
|
"logits/generated": -1.605548620223999, |
|
"logits/real": -1.788865089416504, |
|
"logps/generated": -1042.3509521484375, |
|
"logps/real": -527.3464965820312, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.53047561645508, |
|
"rewards/margins": 24.470928192138672, |
|
"rewards/real": -8.059545516967773, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.15018315018315e-08, |
|
"logits/generated": -1.6185451745986938, |
|
"logits/real": -1.8139030933380127, |
|
"logps/generated": -965.8375854492188, |
|
"logps/real": -508.0126953125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.107463836669922, |
|
"rewards/margins": 22.463966369628906, |
|
"rewards/real": -7.643497467041016, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/generated": -1.61648428440094, |
|
"eval_logits/real": -1.81755793094635, |
|
"eval_logps/generated": -940.4099731445312, |
|
"eval_logps/real": -523.1380004882812, |
|
"eval_loss": 0.016207309439778328, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -29.792985916137695, |
|
"eval_rewards/margins": 21.823434829711914, |
|
"eval_rewards/real": -7.969552993774414, |
|
"eval_runtime": 1801.8606, |
|
"eval_samples_per_second": 2.396, |
|
"eval_steps_per_second": 0.075, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.0586080586080584e-08, |
|
"logits/generated": -1.56507408618927, |
|
"logits/real": -1.8616406917572021, |
|
"logps/generated": -976.12548828125, |
|
"logps/real": -496.3408203125, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.310409545898438, |
|
"rewards/margins": 23.59577178955078, |
|
"rewards/real": -7.714636325836182, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.9670329670329668e-08, |
|
"logits/generated": -1.5671743154525757, |
|
"logits/real": -1.733432412147522, |
|
"logps/generated": -920.916015625, |
|
"logps/real": -522.0253295898438, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.688098907470703, |
|
"rewards/margins": 20.804473876953125, |
|
"rewards/real": -7.883625030517578, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.875457875457875e-08, |
|
"logits/generated": -1.6324392557144165, |
|
"logits/real": -1.8402057886123657, |
|
"logps/generated": -949.7025146484375, |
|
"logps/real": -491.1188049316406, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.04391098022461, |
|
"rewards/margins": 23.250308990478516, |
|
"rewards/real": -7.793595790863037, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_logits/generated": -1.6044836044311523, |
|
"eval_logits/real": -1.809339165687561, |
|
"eval_logps/generated": -949.38916015625, |
|
"eval_logps/real": -525.528564453125, |
|
"eval_loss": 0.01638590730726719, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -30.69091033935547, |
|
"eval_rewards/margins": 22.482301712036133, |
|
"eval_rewards/real": -8.208609580993652, |
|
"eval_runtime": 1798.0882, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7838827838827838e-08, |
|
"logits/generated": -1.596328854560852, |
|
"logits/real": -1.8236808776855469, |
|
"logps/generated": -962.3810424804688, |
|
"logps/real": -511.0006408691406, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -31.28286361694336, |
|
"rewards/margins": 23.244314193725586, |
|
"rewards/real": -8.038549423217773, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.692307692307692e-08, |
|
"logits/generated": -1.608758568763733, |
|
"logits/real": -1.8891884088516235, |
|
"logps/generated": -1000.3331298828125, |
|
"logps/real": -517.9403076171875, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.935813903808594, |
|
"rewards/margins": 25.521175384521484, |
|
"rewards/real": -7.414637565612793, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_logits/generated": -1.591917634010315, |
|
"eval_logits/real": -1.7978274822235107, |
|
"eval_logps/generated": -949.2075805664062, |
|
"eval_logps/real": -524.6597290039062, |
|
"eval_loss": 0.016565019264817238, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -30.672739028930664, |
|
"eval_rewards/margins": 22.55101776123047, |
|
"eval_rewards/real": -8.121725082397461, |
|
"eval_runtime": 1802.0893, |
|
"eval_samples_per_second": 2.396, |
|
"eval_steps_per_second": 0.075, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.600732600732601e-08, |
|
"logits/generated": -1.537630319595337, |
|
"logits/real": -1.7378448247909546, |
|
"logps/generated": -968.7054443359375, |
|
"logps/real": -489.5370178222656, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.376976013183594, |
|
"rewards/margins": 24.455623626708984, |
|
"rewards/real": -7.921347141265869, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.509157509157509e-08, |
|
"logits/generated": -1.604174017906189, |
|
"logits/real": -1.8114948272705078, |
|
"logps/generated": -937.0480346679688, |
|
"logps/real": -495.5521545410156, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.041767120361328, |
|
"rewards/margins": 22.23421859741211, |
|
"rewards/real": -7.807549953460693, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4175824175824175e-08, |
|
"logits/generated": -1.5735671520233154, |
|
"logits/real": -1.7788879871368408, |
|
"logps/generated": -938.4112548828125, |
|
"logps/real": -486.3994140625, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -29.847030639648438, |
|
"rewards/margins": 22.341915130615234, |
|
"rewards/real": -7.505116939544678, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/generated": -1.5908763408660889, |
|
"eval_logits/real": -1.7955536842346191, |
|
"eval_logps/generated": -943.6237182617188, |
|
"eval_logps/real": -521.6991577148438, |
|
"eval_loss": 0.016153085976839066, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -30.114360809326172, |
|
"eval_rewards/margins": 22.2886962890625, |
|
"eval_rewards/real": -7.825665473937988, |
|
"eval_runtime": 1801.6388, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.326007326007326e-08, |
|
"logits/generated": -1.5542490482330322, |
|
"logits/real": -1.7995363473892212, |
|
"logps/generated": -1011.2404174804688, |
|
"logps/real": -517.3983764648438, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -32.60115432739258, |
|
"rewards/margins": 24.621551513671875, |
|
"rewards/real": -7.979601860046387, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2344322344322346e-08, |
|
"logits/generated": -1.5683870315551758, |
|
"logits/real": -1.7601861953735352, |
|
"logps/generated": -916.2017822265625, |
|
"logps/real": -493.31494140625, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.836261749267578, |
|
"rewards/margins": 22.397926330566406, |
|
"rewards/real": -7.438332557678223, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/generated": -1.5854144096374512, |
|
"eval_logits/real": -1.7919222116470337, |
|
"eval_logps/generated": -949.1341552734375, |
|
"eval_logps/real": -523.7774658203125, |
|
"eval_loss": 0.0166173093020916, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -30.665393829345703, |
|
"eval_rewards/margins": 22.631893157958984, |
|
"eval_rewards/real": -8.033498764038086, |
|
"eval_runtime": 1798.076, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.1428571428571426e-08, |
|
"logits/generated": -1.6264305114746094, |
|
"logits/real": -1.851205825805664, |
|
"logps/generated": -929.2615356445312, |
|
"logps/real": -481.2755432128906, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.502222061157227, |
|
"rewards/margins": 23.29401206970215, |
|
"rewards/real": -7.2082085609436035, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0512820512820512e-08, |
|
"logits/generated": -1.6349788904190063, |
|
"logits/real": -1.8103811740875244, |
|
"logps/generated": -930.9240112304688, |
|
"logps/real": -486.3633728027344, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -28.78774642944336, |
|
"rewards/margins": 21.5825138092041, |
|
"rewards/real": -7.205234527587891, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.9597069597069596e-08, |
|
"logits/generated": -1.5818378925323486, |
|
"logits/real": -1.8051410913467407, |
|
"logps/generated": -994.4307861328125, |
|
"logps/real": -506.43048095703125, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -32.12641143798828, |
|
"rewards/margins": 24.237791061401367, |
|
"rewards/real": -7.888618469238281, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/generated": -1.5768269300460815, |
|
"eval_logits/real": -1.7851576805114746, |
|
"eval_logps/generated": -952.6190795898438, |
|
"eval_logps/real": -525.199951171875, |
|
"eval_loss": 0.016495853662490845, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.013896942138672, |
|
"eval_rewards/margins": 22.838150024414062, |
|
"eval_rewards/real": -8.175748825073242, |
|
"eval_runtime": 1801.7659, |
|
"eval_samples_per_second": 2.397, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.868131868131868e-08, |
|
"logits/generated": -1.5746369361877441, |
|
"logits/real": -1.8138281106948853, |
|
"logps/generated": -935.8997192382812, |
|
"logps/real": -524.5531616210938, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.7835750579834, |
|
"rewards/margins": 22.015628814697266, |
|
"rewards/real": -7.767943382263184, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.7765567765567766e-08, |
|
"logits/generated": -1.617248296737671, |
|
"logits/real": -1.8158845901489258, |
|
"logps/generated": -975.7345581054688, |
|
"logps/real": -520.7113037109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.075618743896484, |
|
"rewards/margins": 23.180437088012695, |
|
"rewards/real": -7.895182132720947, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_logits/generated": -1.5756635665893555, |
|
"eval_logits/real": -1.7830266952514648, |
|
"eval_logps/generated": -950.0453491210938, |
|
"eval_logps/real": -523.9951171875, |
|
"eval_loss": 0.01654692552983761, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -30.75650978088379, |
|
"eval_rewards/margins": 22.701244354248047, |
|
"eval_rewards/real": -8.055268287658691, |
|
"eval_runtime": 1788.1081, |
|
"eval_samples_per_second": 2.415, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.684981684981685e-08, |
|
"logits/generated": -1.591524362564087, |
|
"logits/real": -1.7995145320892334, |
|
"logps/generated": -919.4166259765625, |
|
"logps/real": -488.1844787597656, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -29.463176727294922, |
|
"rewards/margins": 22.12551498413086, |
|
"rewards/real": -7.337666988372803, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.5934065934065933e-08, |
|
"logits/generated": -1.5764684677124023, |
|
"logits/real": -1.7932662963867188, |
|
"logps/generated": -967.7091064453125, |
|
"logps/real": -521.2521362304688, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.743621826171875, |
|
"rewards/margins": 24.00382423400879, |
|
"rewards/real": -7.739800453186035, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.5018315018315017e-08, |
|
"logits/generated": -1.5970559120178223, |
|
"logits/real": -1.8183799982070923, |
|
"logps/generated": -954.7509765625, |
|
"logps/real": -505.82757568359375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.225833892822266, |
|
"rewards/margins": 23.92045021057129, |
|
"rewards/real": -7.305386543273926, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_logits/generated": -1.5691884756088257, |
|
"eval_logits/real": -1.7789667844772339, |
|
"eval_logps/generated": -955.8453369140625, |
|
"eval_logps/real": -525.2799682617188, |
|
"eval_loss": 0.01644195057451725, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.336515426635742, |
|
"eval_rewards/margins": 23.152767181396484, |
|
"eval_rewards/real": -8.18375015258789, |
|
"eval_runtime": 1807.2715, |
|
"eval_samples_per_second": 2.389, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.4102564102564102e-08, |
|
"logits/generated": -1.5513131618499756, |
|
"logits/real": -1.7797822952270508, |
|
"logps/generated": -938.0900268554688, |
|
"logps/real": -503.44921875, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -31.36539649963379, |
|
"rewards/margins": 23.386436462402344, |
|
"rewards/real": -7.978959083557129, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.3186813186813187e-08, |
|
"logits/generated": -1.6085302829742432, |
|
"logits/real": -1.7643792629241943, |
|
"logps/generated": -978.6404418945312, |
|
"logps/real": -512.4838256835938, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -31.299551010131836, |
|
"rewards/margins": 23.147233963012695, |
|
"rewards/real": -8.152318000793457, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_logits/generated": -1.57485032081604, |
|
"eval_logits/real": -1.7849942445755005, |
|
"eval_logps/generated": -956.59814453125, |
|
"eval_logps/real": -525.350830078125, |
|
"eval_loss": 0.01633109152317047, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.411802291870117, |
|
"eval_rewards/margins": 23.220966339111328, |
|
"eval_rewards/real": -8.190834999084473, |
|
"eval_runtime": 1798.0611, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.227106227106227e-08, |
|
"logits/generated": -1.548825979232788, |
|
"logits/real": -1.7612594366073608, |
|
"logps/generated": -1017.5808715820312, |
|
"logps/real": -520.19384765625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.57468795776367, |
|
"rewards/margins": 25.4267520904541, |
|
"rewards/real": -8.147936820983887, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.1355311355311355e-08, |
|
"logits/generated": -1.556921362876892, |
|
"logits/real": -1.7643944025039673, |
|
"logps/generated": -938.5661010742188, |
|
"logps/real": -503.1206970214844, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -29.682641983032227, |
|
"rewards/margins": 21.84661102294922, |
|
"rewards/real": -7.83603048324585, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.0439560439560439e-08, |
|
"logits/generated": -1.5458358526229858, |
|
"logits/real": -1.7758142948150635, |
|
"logps/generated": -942.8790893554688, |
|
"logps/real": -487.7559509277344, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -31.93533706665039, |
|
"rewards/margins": 24.24590492248535, |
|
"rewards/real": -7.689431667327881, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_logits/generated": -1.5624111890792847, |
|
"eval_logits/real": -1.7751930952072144, |
|
"eval_logps/generated": -963.309814453125, |
|
"eval_logps/real": -527.5802001953125, |
|
"eval_loss": 0.016566824167966843, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -32.08296585083008, |
|
"eval_rewards/margins": 23.669187545776367, |
|
"eval_rewards/real": -8.413775444030762, |
|
"eval_runtime": 1800.7952, |
|
"eval_samples_per_second": 2.398, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.523809523809522e-09, |
|
"logits/generated": -1.5966811180114746, |
|
"logits/real": -1.791329026222229, |
|
"logps/generated": -976.7927856445312, |
|
"logps/real": -493.8138122558594, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -32.71385955810547, |
|
"rewards/margins": 24.73776626586914, |
|
"rewards/real": -7.9760942459106445, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.608058608058607e-09, |
|
"logits/generated": -1.5438224077224731, |
|
"logits/real": -1.7942355871200562, |
|
"logps/generated": -1013.4166870117188, |
|
"logps/real": -529.5133056640625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.10750961303711, |
|
"rewards/margins": 25.782058715820312, |
|
"rewards/real": -8.325451850891113, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_logits/generated": -1.5631078481674194, |
|
"eval_logits/real": -1.775943398475647, |
|
"eval_logps/generated": -964.0065307617188, |
|
"eval_logps/real": -527.6651611328125, |
|
"eval_loss": 0.016596974804997444, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -32.15264129638672, |
|
"eval_rewards/margins": 23.730371475219727, |
|
"eval_rewards/real": -8.422268867492676, |
|
"eval_runtime": 1805.5605, |
|
"eval_samples_per_second": 2.392, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.692307692307693e-09, |
|
"logits/generated": -1.6013424396514893, |
|
"logits/real": -1.8322757482528687, |
|
"logps/generated": -1004.1285400390625, |
|
"logps/real": -520.826416015625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.476234436035156, |
|
"rewards/margins": 25.89908218383789, |
|
"rewards/real": -8.577150344848633, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.776556776556776e-09, |
|
"logits/generated": -1.6247609853744507, |
|
"logits/real": -1.8308923244476318, |
|
"logps/generated": -974.4166870117188, |
|
"logps/real": -467.469970703125, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -33.500511169433594, |
|
"rewards/margins": 25.762847900390625, |
|
"rewards/real": -7.737664699554443, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.86080586080586e-09, |
|
"logits/generated": -1.5782761573791504, |
|
"logits/real": -1.8114595413208008, |
|
"logps/generated": -984.72265625, |
|
"logps/real": -521.5853881835938, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -32.02996826171875, |
|
"rewards/margins": 24.080408096313477, |
|
"rewards/real": -7.949559211730957, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_logits/generated": -1.573925256729126, |
|
"eval_logits/real": -1.7831730842590332, |
|
"eval_logps/generated": -955.689453125, |
|
"eval_logps/real": -525.0056762695312, |
|
"eval_loss": 0.016293587163090706, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.32093048095703, |
|
"eval_rewards/margins": 23.164613723754883, |
|
"eval_rewards/real": -8.156318664550781, |
|
"eval_runtime": 1805.9186, |
|
"eval_samples_per_second": 2.391, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.945054945054945e-09, |
|
"logits/generated": -1.5925065279006958, |
|
"logits/real": -1.8153518438339233, |
|
"logps/generated": -977.4358520507812, |
|
"logps/real": -493.5396423339844, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.501895904541016, |
|
"rewards/margins": 24.7786865234375, |
|
"rewards/real": -7.723211765289307, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.02930402930403e-09, |
|
"logits/generated": -1.5815564393997192, |
|
"logits/real": -1.7986618280410767, |
|
"logps/generated": -984.4176025390625, |
|
"logps/real": -496.143310546875, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.3635311126709, |
|
"rewards/margins": 22.687597274780273, |
|
"rewards/real": -7.675933837890625, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_logits/generated": -1.5708197355270386, |
|
"eval_logits/real": -1.7807316780090332, |
|
"eval_logps/generated": -957.48876953125, |
|
"eval_logps/real": -525.5498046875, |
|
"eval_loss": 0.016291461884975433, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.50086784362793, |
|
"eval_rewards/margins": 23.290132522583008, |
|
"eval_rewards/real": -8.210736274719238, |
|
"eval_runtime": 1806.1638, |
|
"eval_samples_per_second": 2.391, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.1135531135531137e-09, |
|
"logits/generated": -1.6078064441680908, |
|
"logits/real": -1.8417888879776, |
|
"logps/generated": -980.16552734375, |
|
"logps/real": -481.13250732421875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.39889144897461, |
|
"rewards/margins": 25.546215057373047, |
|
"rewards/real": -7.852681636810303, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.197802197802198e-09, |
|
"logits/generated": -1.5867105722427368, |
|
"logits/real": -1.8165124654769897, |
|
"logps/generated": -991.1232299804688, |
|
"logps/real": -548.1749267578125, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -30.872753143310547, |
|
"rewards/margins": 22.4478759765625, |
|
"rewards/real": -8.424878120422363, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.282051282051282e-09, |
|
"logits/generated": -1.5666377544403076, |
|
"logits/real": -1.8013808727264404, |
|
"logps/generated": -938.0455932617188, |
|
"logps/real": -504.17022705078125, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -30.902517318725586, |
|
"rewards/margins": 23.061681747436523, |
|
"rewards/real": -7.840832710266113, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_logits/generated": -1.5719400644302368, |
|
"eval_logits/real": -1.7812800407409668, |
|
"eval_logps/generated": -956.3063354492188, |
|
"eval_logps/real": -525.1734619140625, |
|
"eval_loss": 0.01621842570602894, |
|
"eval_rewards/accuracies": 0.9916666746139526, |
|
"eval_rewards/generated": -31.38262367248535, |
|
"eval_rewards/margins": 23.20952606201172, |
|
"eval_rewards/real": -8.173093795776367, |
|
"eval_runtime": 1803.2665, |
|
"eval_samples_per_second": 2.395, |
|
"eval_steps_per_second": 0.075, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.6630036630036627e-10, |
|
"logits/generated": -1.5853986740112305, |
|
"logits/real": -1.8437074422836304, |
|
"logps/generated": -930.419921875, |
|
"logps/real": -524.40087890625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -30.547359466552734, |
|
"rewards/margins": 22.564682006835938, |
|
"rewards/real": -7.982677459716797, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1214, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03410133493748145, |
|
"train_runtime": 146707.6169, |
|
"train_samples_per_second": 0.53, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1214, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|