|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4668, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0706638115631692e-09, |
|
"logits/generated": -3.0364484786987305, |
|
"logits/real": -3.0630810260772705, |
|
"logps/generated": -251.72409057617188, |
|
"logps/real": -237.75723266601562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.070663811563169e-08, |
|
"logits/generated": -2.9856934547424316, |
|
"logits/real": -2.989187240600586, |
|
"logps/generated": -390.6001892089844, |
|
"logps/real": -373.385498046875, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/generated": -0.004033928737044334, |
|
"rewards/margins": 0.010273342952132225, |
|
"rewards/real": 0.006239414215087891, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.141327623126338e-08, |
|
"logits/generated": -3.0185017585754395, |
|
"logits/real": -2.9879310131073, |
|
"logps/generated": -393.1386413574219, |
|
"logps/real": -348.47198486328125, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": 0.009772378951311111, |
|
"rewards/margins": 0.008250057697296143, |
|
"rewards/real": 0.018022436648607254, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2119914346895076e-08, |
|
"logits/generated": -3.0177969932556152, |
|
"logits/real": -3.0238332748413086, |
|
"logps/generated": -361.01361083984375, |
|
"logps/real": -317.5545654296875, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/generated": 0.04016115143895149, |
|
"rewards/margins": 0.07257900387048721, |
|
"rewards/real": 0.112740159034729, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.282655246252676e-08, |
|
"logits/generated": -3.012861967086792, |
|
"logits/real": -3.010136604309082, |
|
"logps/generated": -404.3400573730469, |
|
"logps/real": -321.47833251953125, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": 0.1161263957619667, |
|
"rewards/margins": 0.16384394466876984, |
|
"rewards/real": 0.27997034788131714, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.353319057815846e-08, |
|
"logits/generated": -3.006298065185547, |
|
"logits/real": -2.9836604595184326, |
|
"logps/generated": -386.39251708984375, |
|
"logps/real": -344.08502197265625, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": 0.17066331207752228, |
|
"rewards/margins": 0.3133729100227356, |
|
"rewards/real": 0.48403626680374146, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.423982869379015e-08, |
|
"logits/generated": -2.9997777938842773, |
|
"logits/real": -2.991501808166504, |
|
"logps/generated": -408.845703125, |
|
"logps/real": -346.44403076171875, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": 0.11928486824035645, |
|
"rewards/margins": 0.6128198504447937, |
|
"rewards/real": 0.7321046590805054, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.494646680942184e-08, |
|
"logits/generated": -2.9945003986358643, |
|
"logits/real": -2.9864401817321777, |
|
"logps/generated": -417.6007385253906, |
|
"logps/real": -394.57769775390625, |
|
"loss": 0.412, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -0.056093405932188034, |
|
"rewards/margins": 1.043709635734558, |
|
"rewards/real": 0.9876161813735962, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.565310492505352e-08, |
|
"logits/generated": -2.996636390686035, |
|
"logits/real": -2.982595682144165, |
|
"logps/generated": -379.52105712890625, |
|
"logps/real": -327.92724609375, |
|
"loss": 0.3944, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -0.12083463370800018, |
|
"rewards/margins": 0.9464915990829468, |
|
"rewards/real": 0.8256568908691406, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.635974304068522e-08, |
|
"logits/generated": -2.9587833881378174, |
|
"logits/real": -2.9507949352264404, |
|
"logps/generated": -392.19256591796875, |
|
"logps/real": -346.1636962890625, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.1786222904920578, |
|
"rewards/margins": 1.2095777988433838, |
|
"rewards/real": 1.0309556722640991, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0706638115631692e-07, |
|
"logits/generated": -2.9543347358703613, |
|
"logits/real": -2.962299346923828, |
|
"logps/generated": -359.78009033203125, |
|
"logps/real": -339.92010498046875, |
|
"loss": 0.3373, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.21250668168067932, |
|
"rewards/margins": 1.3779737949371338, |
|
"rewards/real": 1.1654671430587769, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.177730192719486e-07, |
|
"logits/generated": -2.9344518184661865, |
|
"logits/real": -2.9429378509521484, |
|
"logps/generated": -400.2679748535156, |
|
"logps/real": -311.1820983886719, |
|
"loss": 0.28, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.47659602761268616, |
|
"rewards/margins": 1.5711917877197266, |
|
"rewards/real": 1.0945957899093628, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.284796573875803e-07, |
|
"logits/generated": -2.9682908058166504, |
|
"logits/real": -2.9808101654052734, |
|
"logps/generated": -382.69219970703125, |
|
"logps/real": -332.9756774902344, |
|
"loss": 0.261, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.8699382543563843, |
|
"rewards/margins": 1.8869482278823853, |
|
"rewards/real": 1.017009973526001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3918629550321198e-07, |
|
"logits/generated": -2.950186014175415, |
|
"logits/real": -2.9397127628326416, |
|
"logps/generated": -397.447021484375, |
|
"logps/real": -314.9403076171875, |
|
"loss": 0.2336, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -1.2718974351882935, |
|
"rewards/margins": 2.2060093879699707, |
|
"rewards/real": 0.9341122508049011, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4989293361884367e-07, |
|
"logits/generated": -2.942518711090088, |
|
"logits/real": -2.9479198455810547, |
|
"logps/generated": -415.5738220214844, |
|
"logps/real": -311.505615234375, |
|
"loss": 0.2199, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.8576176166534424, |
|
"rewards/margins": 2.6989645957946777, |
|
"rewards/real": 0.8413470387458801, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6059957173447535e-07, |
|
"logits/generated": -2.934556007385254, |
|
"logits/real": -2.9276509284973145, |
|
"logps/generated": -408.669189453125, |
|
"logps/real": -376.3493957519531, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -1.4676240682601929, |
|
"rewards/margins": 2.4751994609832764, |
|
"rewards/real": 1.007575273513794, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7130620985010704e-07, |
|
"logits/generated": -2.9072623252868652, |
|
"logits/real": -2.911750078201294, |
|
"logps/generated": -457.24188232421875, |
|
"logps/real": -372.0235595703125, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.4295992851257324, |
|
"rewards/margins": 3.2849929332733154, |
|
"rewards/real": 0.8553940057754517, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8201284796573874e-07, |
|
"logits/generated": -2.930424928665161, |
|
"logits/real": -2.917999744415283, |
|
"logps/generated": -365.8371887207031, |
|
"logps/real": -290.6273498535156, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -2.619870662689209, |
|
"rewards/margins": 2.9378836154937744, |
|
"rewards/real": 0.3180127739906311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9271948608137044e-07, |
|
"logits/generated": -2.9163429737091064, |
|
"logits/real": -2.91850209236145, |
|
"logps/generated": -413.27288818359375, |
|
"logps/real": -344.4951171875, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -3.2495665550231934, |
|
"rewards/margins": 3.9354729652404785, |
|
"rewards/real": 0.6859063506126404, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0342612419700214e-07, |
|
"logits/generated": -2.914929151535034, |
|
"logits/real": -2.8999643325805664, |
|
"logps/generated": -455.07135009765625, |
|
"logps/real": -330.7913818359375, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.547805070877075, |
|
"rewards/margins": 3.8215222358703613, |
|
"rewards/real": 0.27371746301651, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1413276231263384e-07, |
|
"logits/generated": -2.898618698120117, |
|
"logits/real": -2.8733317852020264, |
|
"logps/generated": -466.3199768066406, |
|
"logps/real": -360.9929504394531, |
|
"loss": 0.1471, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.219028472900391, |
|
"rewards/margins": 4.406793594360352, |
|
"rewards/real": 0.18776562809944153, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.248394004282655e-07, |
|
"logits/generated": -2.8917415142059326, |
|
"logits/real": -2.8755955696105957, |
|
"logps/generated": -410.87451171875, |
|
"logps/real": -356.9337463378906, |
|
"loss": 0.1496, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -4.3014631271362305, |
|
"rewards/margins": 4.1496405601501465, |
|
"rewards/real": -0.1518225222826004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.355460385438972e-07, |
|
"logits/generated": -2.8925869464874268, |
|
"logits/real": -2.8660550117492676, |
|
"logps/generated": -421.72430419921875, |
|
"logps/real": -346.0505065917969, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -4.356566429138184, |
|
"rewards/margins": 4.137876987457275, |
|
"rewards/real": -0.21868903934955597, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.462526766595289e-07, |
|
"logits/generated": -2.891606569290161, |
|
"logits/real": -2.8733668327331543, |
|
"logps/generated": -414.7857360839844, |
|
"logps/real": -352.6590881347656, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.677205562591553, |
|
"rewards/margins": 4.315895080566406, |
|
"rewards/real": -0.3613104224205017, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.569593147751606e-07, |
|
"logits/generated": -2.8873989582061768, |
|
"logits/real": -2.8477070331573486, |
|
"logps/generated": -446.8787536621094, |
|
"logps/real": -355.8851013183594, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -4.631860256195068, |
|
"rewards/margins": 4.302509784698486, |
|
"rewards/real": -0.3293505311012268, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.676659528907923e-07, |
|
"logits/generated": -2.8453519344329834, |
|
"logits/real": -2.813788652420044, |
|
"logps/generated": -419.0545959472656, |
|
"logps/real": -332.58184814453125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.903704643249512, |
|
"rewards/margins": 4.795269966125488, |
|
"rewards/real": -0.10843384265899658, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7837259100642395e-07, |
|
"logits/generated": -2.826523542404175, |
|
"logits/real": -2.7952752113342285, |
|
"logps/generated": -454.9879455566406, |
|
"logps/real": -370.6460876464844, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -5.665987491607666, |
|
"rewards/margins": 4.868983268737793, |
|
"rewards/real": -0.7970041632652283, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.890792291220557e-07, |
|
"logits/generated": -2.7900607585906982, |
|
"logits/real": -2.7720229625701904, |
|
"logps/generated": -477.99053955078125, |
|
"logps/real": -324.0495300292969, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -6.587684631347656, |
|
"rewards/margins": 5.73649787902832, |
|
"rewards/real": -0.8511865735054016, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9978586723768735e-07, |
|
"logits/generated": -2.7956130504608154, |
|
"logits/real": -2.7502362728118896, |
|
"logps/generated": -441.34320068359375, |
|
"logps/real": -338.3302917480469, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -5.030495643615723, |
|
"rewards/margins": 4.927591800689697, |
|
"rewards/real": -0.10290361940860748, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1049250535331905e-07, |
|
"logits/generated": -2.776495933532715, |
|
"logits/real": -2.7354507446289062, |
|
"logps/generated": -443.36163330078125, |
|
"logps/real": -347.2231750488281, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -5.4566240310668945, |
|
"rewards/margins": 5.132528781890869, |
|
"rewards/real": -0.32409486174583435, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.211991434689507e-07, |
|
"logits/generated": -2.705543041229248, |
|
"logits/real": -2.6740341186523438, |
|
"logps/generated": -402.8146667480469, |
|
"logps/real": -273.10748291015625, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -6.659946441650391, |
|
"rewards/margins": 5.823763847351074, |
|
"rewards/real": -0.8361822962760925, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3190578158458244e-07, |
|
"logits/generated": -2.7414534091949463, |
|
"logits/real": -2.694044589996338, |
|
"logps/generated": -466.59375, |
|
"logps/real": -307.7261657714844, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.890405178070068, |
|
"rewards/margins": 6.506340980529785, |
|
"rewards/real": -0.3840644359588623, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.426124197002141e-07, |
|
"logits/generated": -2.7618134021759033, |
|
"logits/real": -2.7209994792938232, |
|
"logps/generated": -477.1549377441406, |
|
"logps/real": -336.30950927734375, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -7.347966194152832, |
|
"rewards/margins": 6.844850063323975, |
|
"rewards/real": -0.5031148791313171, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.533190578158458e-07, |
|
"logits/generated": -2.769962787628174, |
|
"logits/real": -2.6862077713012695, |
|
"logps/generated": -476.4065856933594, |
|
"logps/real": -356.8155517578125, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -6.444831848144531, |
|
"rewards/margins": 6.0999369621276855, |
|
"rewards/real": -0.3448948264122009, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.640256959314775e-07, |
|
"logits/generated": -2.730198383331299, |
|
"logits/real": -2.729130268096924, |
|
"logps/generated": -452.85626220703125, |
|
"logps/real": -316.28997802734375, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -6.9878082275390625, |
|
"rewards/margins": 6.373122215270996, |
|
"rewards/real": -0.6146861910820007, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.747323340471092e-07, |
|
"logits/generated": -2.700066566467285, |
|
"logits/real": -2.6724190711975098, |
|
"logps/generated": -458.28857421875, |
|
"logps/real": -329.33203125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -5.48982048034668, |
|
"rewards/margins": 5.636635780334473, |
|
"rewards/real": 0.14681576192378998, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.854389721627409e-07, |
|
"logits/generated": -2.709381103515625, |
|
"logits/real": -2.6651294231414795, |
|
"logps/generated": -462.7777404785156, |
|
"logps/real": -367.77740478515625, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.926550388336182, |
|
"rewards/margins": 6.700352668762207, |
|
"rewards/real": -0.22619831562042236, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.961456102783726e-07, |
|
"logits/generated": -2.7424886226654053, |
|
"logits/real": -2.6503500938415527, |
|
"logps/generated": -489.62542724609375, |
|
"logps/real": -340.3368225097656, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -7.2433366775512695, |
|
"rewards/margins": 6.5567626953125, |
|
"rewards/real": -0.6865738034248352, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.068522483940043e-07, |
|
"logits/generated": -2.6518828868865967, |
|
"logits/real": -2.578998327255249, |
|
"logps/generated": -491.3388671875, |
|
"logps/real": -354.8604736328125, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -8.11082935333252, |
|
"rewards/margins": 6.844748020172119, |
|
"rewards/real": -1.2660824060440063, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.175588865096359e-07, |
|
"logits/generated": -2.6482059955596924, |
|
"logits/real": -2.593445301055908, |
|
"logps/generated": -458.67864990234375, |
|
"logps/real": -341.2268981933594, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.110146999359131, |
|
"rewards/margins": 6.503669738769531, |
|
"rewards/real": -0.6064783334732056, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.282655246252677e-07, |
|
"logits/generated": -2.6590518951416016, |
|
"logits/real": -2.589247703552246, |
|
"logps/generated": -485.0723571777344, |
|
"logps/real": -371.298828125, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -7.317061424255371, |
|
"rewards/margins": 6.85882568359375, |
|
"rewards/real": -0.4582356810569763, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.389721627408993e-07, |
|
"logits/generated": -2.657744884490967, |
|
"logits/real": -2.634624481201172, |
|
"logps/generated": -440.25396728515625, |
|
"logps/real": -309.36871337890625, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -6.629510402679443, |
|
"rewards/margins": 6.084707260131836, |
|
"rewards/real": -0.5448042154312134, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.49678800856531e-07, |
|
"logits/generated": -2.7457222938537598, |
|
"logits/real": -2.698697566986084, |
|
"logps/generated": -477.81475830078125, |
|
"logps/real": -359.94305419921875, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -5.93577241897583, |
|
"rewards/margins": 6.0500030517578125, |
|
"rewards/real": 0.11423077434301376, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.603854389721627e-07, |
|
"logits/generated": -2.7255027294158936, |
|
"logits/real": -2.6430881023406982, |
|
"logps/generated": -491.6402282714844, |
|
"logps/real": -329.3466796875, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.748249053955078, |
|
"rewards/margins": 6.983546257019043, |
|
"rewards/real": 0.23529770970344543, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.710920770877944e-07, |
|
"logits/generated": -2.727783679962158, |
|
"logits/real": -2.6885695457458496, |
|
"logps/generated": -479.18585205078125, |
|
"logps/real": -303.29132080078125, |
|
"loss": 0.1195, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.20239782333374, |
|
"rewards/margins": 6.955704689025879, |
|
"rewards/real": -0.24669349193572998, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.817987152034261e-07, |
|
"logits/generated": -2.704719305038452, |
|
"logits/real": -2.672463893890381, |
|
"logps/generated": -448.4974060058594, |
|
"logps/real": -331.91583251953125, |
|
"loss": 0.1325, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -6.272010326385498, |
|
"rewards/margins": 5.807042121887207, |
|
"rewards/real": -0.4649685323238373, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.925053533190578e-07, |
|
"logits/generated": -2.7187066078186035, |
|
"logits/real": -2.650846004486084, |
|
"logps/generated": -499.6753845214844, |
|
"logps/real": -356.98614501953125, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -8.36271858215332, |
|
"rewards/margins": 7.270302772521973, |
|
"rewards/real": -1.092416524887085, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996429421566293e-07, |
|
"logits/generated": -2.65824556350708, |
|
"logits/real": -2.5810179710388184, |
|
"logps/generated": -515.4708862304688, |
|
"logps/real": -335.44573974609375, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -9.19658088684082, |
|
"rewards/margins": 8.151226043701172, |
|
"rewards/real": -1.0453550815582275, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.98452749345394e-07, |
|
"logits/generated": -2.572777032852173, |
|
"logits/real": -2.529254913330078, |
|
"logps/generated": -461.7464294433594, |
|
"logps/real": -406.6604309082031, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -8.338438034057617, |
|
"rewards/margins": 7.123196601867676, |
|
"rewards/real": -1.2152409553527832, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.972625565341585e-07, |
|
"logits/generated": -2.5586600303649902, |
|
"logits/real": -2.4877161979675293, |
|
"logps/generated": -430.3533630371094, |
|
"logps/real": -323.3705749511719, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -7.9221014976501465, |
|
"rewards/margins": 7.004052639007568, |
|
"rewards/real": -0.9180490374565125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.960723637229232e-07, |
|
"logits/generated": -2.5836546421051025, |
|
"logits/real": -2.552192211151123, |
|
"logps/generated": -456.425537109375, |
|
"logps/real": -340.4407958984375, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -8.413108825683594, |
|
"rewards/margins": 7.556717872619629, |
|
"rewards/real": -0.8563922047615051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.948821709116876e-07, |
|
"logits/generated": -2.632378101348877, |
|
"logits/real": -2.586066246032715, |
|
"logps/generated": -491.17681884765625, |
|
"logps/real": -358.8891906738281, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -9.601828575134277, |
|
"rewards/margins": 8.468477249145508, |
|
"rewards/real": -1.1333516836166382, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.936919781004522e-07, |
|
"logits/generated": -2.6995837688446045, |
|
"logits/real": -2.663362979888916, |
|
"logps/generated": -473.89410400390625, |
|
"logps/real": -377.1138610839844, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -8.111103057861328, |
|
"rewards/margins": 7.439300537109375, |
|
"rewards/real": -0.6718028783798218, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925017852892168e-07, |
|
"logits/generated": -2.7533721923828125, |
|
"logits/real": -2.7160682678222656, |
|
"logps/generated": -463.5284729003906, |
|
"logps/real": -364.9224548339844, |
|
"loss": 0.1281, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -6.726442813873291, |
|
"rewards/margins": 7.018113136291504, |
|
"rewards/real": 0.29166945815086365, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.913115924779814e-07, |
|
"logits/generated": -2.692500352859497, |
|
"logits/real": -2.6972968578338623, |
|
"logps/generated": -485.6321716308594, |
|
"logps/real": -365.08599853515625, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -7.418447971343994, |
|
"rewards/margins": 6.375821113586426, |
|
"rewards/real": -1.0426270961761475, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.90121399666746e-07, |
|
"logits/generated": -2.6083171367645264, |
|
"logits/real": -2.5617499351501465, |
|
"logps/generated": -464.82916259765625, |
|
"logps/real": -343.98126220703125, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.705323696136475, |
|
"rewards/margins": 7.201271057128906, |
|
"rewards/real": -0.504052996635437, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.889312068555106e-07, |
|
"logits/generated": -2.628220558166504, |
|
"logits/real": -2.526280403137207, |
|
"logps/generated": -502.65032958984375, |
|
"logps/real": -313.36639404296875, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -9.855968475341797, |
|
"rewards/margins": 8.44670295715332, |
|
"rewards/real": -1.4092657566070557, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.877410140442752e-07, |
|
"logits/generated": -2.6273674964904785, |
|
"logits/real": -2.5923876762390137, |
|
"logps/generated": -443.7357482910156, |
|
"logps/real": -344.1656188964844, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -9.525670051574707, |
|
"rewards/margins": 7.985457420349121, |
|
"rewards/real": -1.5402114391326904, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.865508212330398e-07, |
|
"logits/generated": -2.734830379486084, |
|
"logits/real": -2.637960433959961, |
|
"logps/generated": -488.81781005859375, |
|
"logps/real": -388.02618408203125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -8.961756706237793, |
|
"rewards/margins": 8.064661979675293, |
|
"rewards/real": -0.8970959782600403, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.853606284218044e-07, |
|
"logits/generated": -2.6835715770721436, |
|
"logits/real": -2.6238436698913574, |
|
"logps/generated": -435.009033203125, |
|
"logps/real": -347.2498474121094, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -7.5509772300720215, |
|
"rewards/margins": 7.190362453460693, |
|
"rewards/real": -0.3606160879135132, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.841704356105689e-07, |
|
"logits/generated": -2.667619466781616, |
|
"logits/real": -2.594552516937256, |
|
"logps/generated": -485.05596923828125, |
|
"logps/real": -357.52520751953125, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -8.562358856201172, |
|
"rewards/margins": 7.882147789001465, |
|
"rewards/real": -0.6802110075950623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829802427993334e-07, |
|
"logits/generated": -2.7254223823547363, |
|
"logits/real": -2.674651861190796, |
|
"logps/generated": -486.1986389160156, |
|
"logps/real": -366.41973876953125, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -9.714421272277832, |
|
"rewards/margins": 8.38983154296875, |
|
"rewards/real": -1.3245890140533447, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.81790049988098e-07, |
|
"logits/generated": -2.7993836402893066, |
|
"logits/real": -2.6992013454437256, |
|
"logps/generated": -477.964111328125, |
|
"logps/real": -352.30255126953125, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -9.314876556396484, |
|
"rewards/margins": 8.130788803100586, |
|
"rewards/real": -1.1840870380401611, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.805998571768626e-07, |
|
"logits/generated": -2.7570552825927734, |
|
"logits/real": -2.722883701324463, |
|
"logps/generated": -466.1041564941406, |
|
"logps/real": -327.46435546875, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -10.392984390258789, |
|
"rewards/margins": 8.88233757019043, |
|
"rewards/real": -1.510647177696228, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.794096643656272e-07, |
|
"logits/generated": -2.7757675647735596, |
|
"logits/real": -2.696953535079956, |
|
"logps/generated": -573.4818115234375, |
|
"logps/real": -451.28277587890625, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -10.725347518920898, |
|
"rewards/margins": 9.061750411987305, |
|
"rewards/real": -1.6635980606079102, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.782194715543918e-07, |
|
"logits/generated": -2.754331111907959, |
|
"logits/real": -2.675215482711792, |
|
"logps/generated": -536.6560668945312, |
|
"logps/real": -378.1103515625, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -9.196868896484375, |
|
"rewards/margins": 8.15418815612793, |
|
"rewards/real": -1.0426809787750244, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.770292787431564e-07, |
|
"logits/generated": -2.869988441467285, |
|
"logits/real": -2.751678943634033, |
|
"logps/generated": -494.60772705078125, |
|
"logps/real": -374.7413024902344, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -8.500930786132812, |
|
"rewards/margins": 7.580558776855469, |
|
"rewards/real": -0.920372486114502, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7583908593192097e-07, |
|
"logits/generated": -2.8370208740234375, |
|
"logits/real": -2.6929588317871094, |
|
"logps/generated": -486.40533447265625, |
|
"logps/real": -372.7452087402344, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -9.398336410522461, |
|
"rewards/margins": 8.141416549682617, |
|
"rewards/real": -1.2569185495376587, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.746488931206855e-07, |
|
"logits/generated": -2.7330057621002197, |
|
"logits/real": -2.7403512001037598, |
|
"logps/generated": -512.094970703125, |
|
"logps/real": -335.0165100097656, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.172496795654297, |
|
"rewards/margins": 9.116006851196289, |
|
"rewards/real": -1.056490182876587, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.734587003094501e-07, |
|
"logits/generated": -2.8084769248962402, |
|
"logits/real": -2.7017087936401367, |
|
"logps/generated": -514.4985961914062, |
|
"logps/real": -370.76409912109375, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -9.885915756225586, |
|
"rewards/margins": 8.93104076385498, |
|
"rewards/real": -0.954875111579895, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.722685074982147e-07, |
|
"logits/generated": -2.7886388301849365, |
|
"logits/real": -2.7220773696899414, |
|
"logps/generated": -485.0414123535156, |
|
"logps/real": -330.4255065917969, |
|
"loss": 0.1032, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -10.818674087524414, |
|
"rewards/margins": 9.245843887329102, |
|
"rewards/real": -1.5728291273117065, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.710783146869793e-07, |
|
"logits/generated": -2.8421998023986816, |
|
"logits/real": -2.718574047088623, |
|
"logps/generated": -492.8663024902344, |
|
"logps/real": -378.52239990234375, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -10.291857719421387, |
|
"rewards/margins": 9.014776229858398, |
|
"rewards/real": -1.277081847190857, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.698881218757438e-07, |
|
"logits/generated": -2.7403712272644043, |
|
"logits/real": -2.693634271621704, |
|
"logps/generated": -450.5480041503906, |
|
"logps/real": -318.03704833984375, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -11.008447647094727, |
|
"rewards/margins": 8.986620903015137, |
|
"rewards/real": -2.021826982498169, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6869792906450845e-07, |
|
"logits/generated": -2.738825559616089, |
|
"logits/real": -2.6762800216674805, |
|
"logps/generated": -516.190673828125, |
|
"logps/real": -378.79364013671875, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.853250503540039, |
|
"rewards/margins": 9.828583717346191, |
|
"rewards/real": -2.0246665477752686, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.67507736253273e-07, |
|
"logits/generated": -2.772712230682373, |
|
"logits/real": -2.7906768321990967, |
|
"logps/generated": -503.3374938964844, |
|
"logps/real": -324.5931396484375, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -11.763754844665527, |
|
"rewards/margins": 10.069581985473633, |
|
"rewards/real": -1.694173812866211, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6631754344203763e-07, |
|
"logits/generated": -2.74733829498291, |
|
"logits/real": -2.7517824172973633, |
|
"logps/generated": -493.4088439941406, |
|
"logps/real": -340.09991455078125, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -10.747503280639648, |
|
"rewards/margins": 8.963977813720703, |
|
"rewards/real": -1.7835248708724976, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6512735063080217e-07, |
|
"logits/generated": -2.7257447242736816, |
|
"logits/real": -2.718722343444824, |
|
"logps/generated": -498.18255615234375, |
|
"logps/real": -334.10333251953125, |
|
"loss": 0.1144, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.353604316711426, |
|
"rewards/margins": 9.699551582336426, |
|
"rewards/real": -1.654052495956421, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6393715781956676e-07, |
|
"logits/generated": -2.699676990509033, |
|
"logits/real": -2.6231935024261475, |
|
"logps/generated": -525.2720336914062, |
|
"logps/real": -386.4446716308594, |
|
"loss": 0.0671, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.2306547164917, |
|
"rewards/margins": 9.587125778198242, |
|
"rewards/real": -0.6435292959213257, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6274696500833135e-07, |
|
"logits/generated": -2.677091360092163, |
|
"logits/real": -2.612697124481201, |
|
"logps/generated": -524.1741943359375, |
|
"logps/real": -354.40130615234375, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.090825080871582, |
|
"rewards/margins": 9.66313362121582, |
|
"rewards/real": -1.4276920557022095, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6155677219709594e-07, |
|
"logits/generated": -2.6755471229553223, |
|
"logits/real": -2.562119960784912, |
|
"logps/generated": -477.6163024902344, |
|
"logps/real": -376.70867919921875, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -10.81368350982666, |
|
"rewards/margins": 9.579755783081055, |
|
"rewards/real": -1.2339270114898682, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.603665793858605e-07, |
|
"logits/generated": -2.6505370140075684, |
|
"logits/real": -2.5655438899993896, |
|
"logps/generated": -453.52276611328125, |
|
"logps/real": -346.87591552734375, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.680634498596191, |
|
"rewards/margins": 8.618191719055176, |
|
"rewards/real": -2.062441825866699, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5917638657462507e-07, |
|
"logits/generated": -2.5472962856292725, |
|
"logits/real": -2.4991610050201416, |
|
"logps/generated": -514.5006103515625, |
|
"logps/real": -402.99920654296875, |
|
"loss": 0.1054, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -10.380758285522461, |
|
"rewards/margins": 8.436528205871582, |
|
"rewards/real": -1.9442304372787476, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5798619376338966e-07, |
|
"logits/generated": -2.5719590187072754, |
|
"logits/real": -2.519853115081787, |
|
"logps/generated": -456.893310546875, |
|
"logps/real": -343.7867431640625, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.286421775817871, |
|
"rewards/margins": 8.027682304382324, |
|
"rewards/real": -2.258739471435547, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.567960009521542e-07, |
|
"logits/generated": -2.5605781078338623, |
|
"logits/real": -2.501222610473633, |
|
"logps/generated": -433.1905212402344, |
|
"logps/real": -311.0120544433594, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -10.496920585632324, |
|
"rewards/margins": 8.129188537597656, |
|
"rewards/real": -2.367732048034668, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5560580814091884e-07, |
|
"logits/generated": -2.565680503845215, |
|
"logits/real": -2.4589760303497314, |
|
"logps/generated": -498.48583984375, |
|
"logps/real": -353.51470947265625, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.815507888793945, |
|
"rewards/margins": 10.009346961975098, |
|
"rewards/real": -2.8061606884002686, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5441561532968337e-07, |
|
"logits/generated": -2.524672746658325, |
|
"logits/real": -2.4803435802459717, |
|
"logps/generated": -510.71533203125, |
|
"logps/real": -395.97503662109375, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.569852828979492, |
|
"rewards/margins": 8.171304702758789, |
|
"rewards/real": -2.398545742034912, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5322542251844796e-07, |
|
"logits/generated": -2.4687283039093018, |
|
"logits/real": -2.4027516841888428, |
|
"logps/generated": -550.343017578125, |
|
"logps/real": -387.80523681640625, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.694091796875, |
|
"rewards/margins": 10.239914894104004, |
|
"rewards/real": -3.454176425933838, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5203522970721255e-07, |
|
"logits/generated": -2.4605631828308105, |
|
"logits/real": -2.317789077758789, |
|
"logps/generated": -515.8772583007812, |
|
"logps/real": -373.03094482421875, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -13.49272346496582, |
|
"rewards/margins": 9.03429126739502, |
|
"rewards/real": -4.458432197570801, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5084503689597714e-07, |
|
"logits/generated": -2.5647082328796387, |
|
"logits/real": -2.478529453277588, |
|
"logps/generated": -497.2862854003906, |
|
"logps/real": -409.93402099609375, |
|
"loss": 0.1512, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.405123710632324, |
|
"rewards/margins": 8.666096687316895, |
|
"rewards/real": -1.7390273809432983, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.496548440847417e-07, |
|
"logits/generated": -2.4828336238861084, |
|
"logits/real": -2.413994550704956, |
|
"logps/generated": -457.74127197265625, |
|
"logps/real": -331.61566162109375, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.047932624816895, |
|
"rewards/margins": 8.361185073852539, |
|
"rewards/real": -1.6867475509643555, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.484646512735063e-07, |
|
"logits/generated": -2.5370724201202393, |
|
"logits/real": -2.4653468132019043, |
|
"logps/generated": -478.68011474609375, |
|
"logps/real": -370.4181213378906, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -10.04428768157959, |
|
"rewards/margins": 8.153284072875977, |
|
"rewards/real": -1.8910045623779297, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4727445846227086e-07, |
|
"logits/generated": -2.500948667526245, |
|
"logits/real": -2.420001745223999, |
|
"logps/generated": -470.17803955078125, |
|
"logps/real": -346.50494384765625, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.731805801391602, |
|
"rewards/margins": 8.218327522277832, |
|
"rewards/real": -1.5134775638580322, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4608426565103545e-07, |
|
"logits/generated": -2.4795870780944824, |
|
"logits/real": -2.42265248298645, |
|
"logps/generated": -478.30303955078125, |
|
"logps/real": -338.5771789550781, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.962733268737793, |
|
"rewards/margins": 8.512125968933105, |
|
"rewards/real": -1.4506077766418457, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4489407283980004e-07, |
|
"logits/generated": -2.4491372108459473, |
|
"logits/real": -2.4447901248931885, |
|
"logps/generated": -483.40020751953125, |
|
"logps/real": -362.98577880859375, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.003097534179688, |
|
"rewards/margins": 9.003512382507324, |
|
"rewards/real": -1.9995838403701782, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.437038800285646e-07, |
|
"logits/generated": -2.459510326385498, |
|
"logits/real": -2.4044106006622314, |
|
"logps/generated": -475.15625, |
|
"logps/real": -334.83526611328125, |
|
"loss": 0.1292, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -9.873337745666504, |
|
"rewards/margins": 8.043792724609375, |
|
"rewards/real": -1.829545021057129, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4251368721732916e-07, |
|
"logits/generated": -2.455169200897217, |
|
"logits/real": -2.3593177795410156, |
|
"logps/generated": -560.1038208007812, |
|
"logps/real": -385.29278564453125, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.663183212280273, |
|
"rewards/margins": 9.093454360961914, |
|
"rewards/real": -2.569728136062622, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.413234944060938e-07, |
|
"logits/generated": -2.4482192993164062, |
|
"logits/real": -2.3591175079345703, |
|
"logps/generated": -466.2801208496094, |
|
"logps/real": -324.6377868652344, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.816349029541016, |
|
"rewards/margins": 8.520627975463867, |
|
"rewards/real": -2.2957208156585693, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4013330159485834e-07, |
|
"logits/generated": -2.435135841369629, |
|
"logits/real": -2.4362385272979736, |
|
"logps/generated": -432.5603942871094, |
|
"logps/real": -325.52471923828125, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.173177719116211, |
|
"rewards/margins": 7.554961204528809, |
|
"rewards/real": -2.6182148456573486, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3894310878362293e-07, |
|
"logits/generated": -2.3807005882263184, |
|
"logits/real": -2.354038715362549, |
|
"logps/generated": -488.814208984375, |
|
"logps/real": -333.68096923828125, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.279688835144043, |
|
"rewards/margins": 8.787554740905762, |
|
"rewards/real": -2.4921340942382812, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.377529159723875e-07, |
|
"logits/generated": -2.4375739097595215, |
|
"logits/real": -2.33906888961792, |
|
"logps/generated": -506.0091247558594, |
|
"logps/real": -340.0153503417969, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -11.742327690124512, |
|
"rewards/margins": 9.213804244995117, |
|
"rewards/real": -2.5285239219665527, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.365627231611521e-07, |
|
"logits/generated": -2.373945951461792, |
|
"logits/real": -2.355705738067627, |
|
"logps/generated": -559.3948364257812, |
|
"logps/real": -378.2243957519531, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -13.373077392578125, |
|
"rewards/margins": 11.117111206054688, |
|
"rewards/real": -2.255967617034912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3537253034991665e-07, |
|
"logits/generated": -2.3554375171661377, |
|
"logits/real": -2.2880444526672363, |
|
"logps/generated": -538.6488037109375, |
|
"logps/real": -424.7191467285156, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.977459907531738, |
|
"rewards/margins": 9.986202239990234, |
|
"rewards/real": -2.9912569522857666, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3418233753868124e-07, |
|
"logits/generated": -2.3351311683654785, |
|
"logits/real": -2.3256137371063232, |
|
"logps/generated": -551.7449340820312, |
|
"logps/real": -456.4244079589844, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -11.744857788085938, |
|
"rewards/margins": 10.64583969116211, |
|
"rewards/real": -1.0990195274353027, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3299214472744583e-07, |
|
"logits/generated": -2.449162006378174, |
|
"logits/real": -2.3387961387634277, |
|
"logps/generated": -493.4405822753906, |
|
"logps/real": -366.27191162109375, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.17166519165039, |
|
"rewards/margins": 9.858702659606934, |
|
"rewards/real": -1.3129618167877197, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3180195191621036e-07, |
|
"logits/generated": -2.372615337371826, |
|
"logits/real": -2.357564687728882, |
|
"logps/generated": -506.31890869140625, |
|
"logps/real": -382.11859130859375, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -10.259264945983887, |
|
"rewards/margins": 8.731651306152344, |
|
"rewards/real": -1.5276130437850952, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.30611759104975e-07, |
|
"logits/generated": -2.4573209285736084, |
|
"logits/real": -2.3666205406188965, |
|
"logps/generated": -515.4904174804688, |
|
"logps/real": -379.75238037109375, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.583343505859375, |
|
"rewards/margins": 10.160721778869629, |
|
"rewards/real": -2.422621011734009, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2942156629373954e-07, |
|
"logits/generated": -2.316649913787842, |
|
"logits/real": -2.2139687538146973, |
|
"logps/generated": -532.4139404296875, |
|
"logps/real": -382.7889404296875, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -13.652563095092773, |
|
"rewards/margins": 9.413002967834473, |
|
"rewards/real": -4.239560604095459, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2823137348250413e-07, |
|
"logits/generated": -2.2381844520568848, |
|
"logits/real": -2.1988272666931152, |
|
"logps/generated": -531.000244140625, |
|
"logps/real": -389.1220397949219, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.427118301391602, |
|
"rewards/margins": 10.272978782653809, |
|
"rewards/real": -3.1541380882263184, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.270411806712687e-07, |
|
"logits/generated": -2.3720908164978027, |
|
"logits/real": -2.348632335662842, |
|
"logps/generated": -521.8538818359375, |
|
"logps/real": -375.3919677734375, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.00294017791748, |
|
"rewards/margins": 10.072427749633789, |
|
"rewards/real": -2.930511474609375, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.258509878600333e-07, |
|
"logits/generated": -2.3700733184814453, |
|
"logits/real": -2.3018269538879395, |
|
"logps/generated": -437.77056884765625, |
|
"logps/real": -336.5913391113281, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -10.549997329711914, |
|
"rewards/margins": 8.99598217010498, |
|
"rewards/real": -1.5540151596069336, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2466079504879785e-07, |
|
"logits/generated": -2.4425048828125, |
|
"logits/real": -2.3580105304718018, |
|
"logps/generated": -480.294677734375, |
|
"logps/real": -348.6474914550781, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -10.765533447265625, |
|
"rewards/margins": 8.889466285705566, |
|
"rewards/real": -1.876068115234375, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.234706022375625e-07, |
|
"logits/generated": -2.4600212574005127, |
|
"logits/real": -2.46691632270813, |
|
"logps/generated": -464.4510803222656, |
|
"logps/real": -327.79296875, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -10.468949317932129, |
|
"rewards/margins": 8.587320327758789, |
|
"rewards/real": -1.8816286325454712, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2228040942632703e-07, |
|
"logits/generated": -2.4465115070343018, |
|
"logits/real": -2.4585764408111572, |
|
"logps/generated": -460.46795654296875, |
|
"logps/real": -305.36920166015625, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -9.80966854095459, |
|
"rewards/margins": 7.955733299255371, |
|
"rewards/real": -1.8539355993270874, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.210902166150916e-07, |
|
"logits/generated": -2.297677516937256, |
|
"logits/real": -2.2483174800872803, |
|
"logps/generated": -491.7379455566406, |
|
"logps/real": -358.0972900390625, |
|
"loss": 0.1178, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -11.660999298095703, |
|
"rewards/margins": 9.423800468444824, |
|
"rewards/real": -2.237199306488037, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199000238038562e-07, |
|
"logits/generated": -2.337754249572754, |
|
"logits/real": -2.2701668739318848, |
|
"logps/generated": -540.6451416015625, |
|
"logps/real": -358.3714294433594, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -12.581125259399414, |
|
"rewards/margins": 10.22091293334961, |
|
"rewards/real": -2.3602118492126465, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.187098309926208e-07, |
|
"logits/generated": -2.328687906265259, |
|
"logits/real": -2.2642407417297363, |
|
"logps/generated": -510.14031982421875, |
|
"logps/real": -395.0322570800781, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.54482650756836, |
|
"rewards/margins": 9.52647590637207, |
|
"rewards/real": -2.0183498859405518, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1751963818138534e-07, |
|
"logits/generated": -2.2651526927948, |
|
"logits/real": -2.2788243293762207, |
|
"logps/generated": -504.93603515625, |
|
"logps/real": -315.02191162109375, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.738065719604492, |
|
"rewards/margins": 10.608281135559082, |
|
"rewards/real": -3.129784345626831, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1632944537015e-07, |
|
"logits/generated": -2.2317709922790527, |
|
"logits/real": -2.24200177192688, |
|
"logps/generated": -509.27069091796875, |
|
"logps/real": -339.88775634765625, |
|
"loss": 0.1091, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.493547439575195, |
|
"rewards/margins": 10.474954605102539, |
|
"rewards/real": -3.0185914039611816, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.151392525589145e-07, |
|
"logits/generated": -2.1863222122192383, |
|
"logits/real": -2.1978249549865723, |
|
"logps/generated": -579.1888427734375, |
|
"logps/real": -406.6951904296875, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.355679512023926, |
|
"rewards/margins": 11.128015518188477, |
|
"rewards/real": -3.227665424346924, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.139490597476791e-07, |
|
"logits/generated": -2.2855031490325928, |
|
"logits/real": -2.2533984184265137, |
|
"logps/generated": -515.9293823242188, |
|
"logps/real": -389.2561340332031, |
|
"loss": 0.098, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -12.194608688354492, |
|
"rewards/margins": 9.927949905395508, |
|
"rewards/real": -2.2666568756103516, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.127588669364437e-07, |
|
"logits/generated": -2.2684109210968018, |
|
"logits/real": -2.2653377056121826, |
|
"logps/generated": -492.51708984375, |
|
"logps/real": -367.3876953125, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -12.415742874145508, |
|
"rewards/margins": 8.819601058959961, |
|
"rewards/real": -3.5961413383483887, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.115686741252083e-07, |
|
"logits/generated": -2.3267722129821777, |
|
"logits/real": -2.3436107635498047, |
|
"logps/generated": -532.1981201171875, |
|
"logps/real": -420.6949157714844, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -11.921398162841797, |
|
"rewards/margins": 9.262883186340332, |
|
"rewards/real": -2.6585140228271484, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103784813139728e-07, |
|
"logits/generated": -2.3338916301727295, |
|
"logits/real": -2.334585666656494, |
|
"logps/generated": -532.9281616210938, |
|
"logps/real": -377.1614990234375, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.63199234008789, |
|
"rewards/margins": 9.967310905456543, |
|
"rewards/real": -2.6646811962127686, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.091882885027374e-07, |
|
"logits/generated": -2.4137134552001953, |
|
"logits/real": -2.4191126823425293, |
|
"logps/generated": -543.4132080078125, |
|
"logps/real": -409.27313232421875, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.591083526611328, |
|
"rewards/margins": 9.39649772644043, |
|
"rewards/real": -2.1945860385894775, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.07998095691502e-07, |
|
"logits/generated": -2.4814438819885254, |
|
"logits/real": -2.460033893585205, |
|
"logps/generated": -558.88427734375, |
|
"logps/real": -361.6279296875, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.638830184936523, |
|
"rewards/margins": 10.296621322631836, |
|
"rewards/real": -2.342207670211792, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0680790288026654e-07, |
|
"logits/generated": -2.347885847091675, |
|
"logits/real": -2.360043525695801, |
|
"logps/generated": -555.4683837890625, |
|
"logps/real": -371.6921081542969, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.670600891113281, |
|
"rewards/margins": 10.81347370147705, |
|
"rewards/real": -1.857126235961914, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.056177100690312e-07, |
|
"logits/generated": -2.285675048828125, |
|
"logits/real": -2.264207363128662, |
|
"logps/generated": -540.85546875, |
|
"logps/real": -306.2080078125, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.267425537109375, |
|
"rewards/margins": 10.254603385925293, |
|
"rewards/real": -3.0128207206726074, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.044275172577957e-07, |
|
"logits/generated": -2.256348133087158, |
|
"logits/real": -2.257491111755371, |
|
"logps/generated": -484.46746826171875, |
|
"logps/real": -345.3021240234375, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.33599853515625, |
|
"rewards/margins": 10.659614562988281, |
|
"rewards/real": -2.6763834953308105, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0323732444656036e-07, |
|
"logits/generated": -2.310236930847168, |
|
"logits/real": -2.3040318489074707, |
|
"logps/generated": -652.2039794921875, |
|
"logps/real": -504.1841735839844, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -14.383448600769043, |
|
"rewards/margins": 11.50027084350586, |
|
"rewards/real": -2.8831779956817627, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.020471316353249e-07, |
|
"logits/generated": -2.356076717376709, |
|
"logits/real": -2.3591604232788086, |
|
"logps/generated": -550.447265625, |
|
"logps/real": -353.8184509277344, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -14.289446830749512, |
|
"rewards/margins": 11.606106758117676, |
|
"rewards/real": -2.6833410263061523, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.008569388240895e-07, |
|
"logits/generated": -2.360548257827759, |
|
"logits/real": -2.312885284423828, |
|
"logps/generated": -535.8861083984375, |
|
"logps/real": -367.6419372558594, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -13.502528190612793, |
|
"rewards/margins": 10.530913352966309, |
|
"rewards/real": -2.971615791320801, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.996667460128541e-07, |
|
"logits/generated": -2.3651747703552246, |
|
"logits/real": -2.385131359100342, |
|
"logps/generated": -576.5161743164062, |
|
"logps/real": -374.0749206542969, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.620333671569824, |
|
"rewards/margins": 11.384607315063477, |
|
"rewards/real": -3.2357261180877686, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9847655320161867e-07, |
|
"logits/generated": -2.3751935958862305, |
|
"logits/real": -2.4091992378234863, |
|
"logps/generated": -528.142333984375, |
|
"logps/real": -353.9475402832031, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -13.35066032409668, |
|
"rewards/margins": 11.197701454162598, |
|
"rewards/real": -2.1529600620269775, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.972863603903832e-07, |
|
"logits/generated": -2.3546595573425293, |
|
"logits/real": -2.3350775241851807, |
|
"logps/generated": -561.2341918945312, |
|
"logps/real": -414.78094482421875, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.160077095031738, |
|
"rewards/margins": 10.612679481506348, |
|
"rewards/real": -2.5473971366882324, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9609616757914784e-07, |
|
"logits/generated": -2.3799915313720703, |
|
"logits/real": -2.4229447841644287, |
|
"logps/generated": -529.3641357421875, |
|
"logps/real": -367.49676513671875, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.449444770812988, |
|
"rewards/margins": 10.857105255126953, |
|
"rewards/real": -2.5923383235931396, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949059747679124e-07, |
|
"logits/generated": -2.3573193550109863, |
|
"logits/real": -2.361992359161377, |
|
"logps/generated": -541.9324340820312, |
|
"logps/real": -408.26885986328125, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.906323432922363, |
|
"rewards/margins": 10.026899337768555, |
|
"rewards/real": -3.8794217109680176, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9371578195667697e-07, |
|
"logits/generated": -2.3486838340759277, |
|
"logits/real": -2.3597350120544434, |
|
"logps/generated": -507.2032775878906, |
|
"logps/real": -343.7771301269531, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -14.98302936553955, |
|
"rewards/margins": 11.527512550354004, |
|
"rewards/real": -3.4555180072784424, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9252558914544156e-07, |
|
"logits/generated": -2.473050594329834, |
|
"logits/real": -2.515122652053833, |
|
"logps/generated": -507.3802185058594, |
|
"logps/real": -396.953125, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -13.604555130004883, |
|
"rewards/margins": 11.562819480895996, |
|
"rewards/real": -2.041734218597412, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9133539633420615e-07, |
|
"logits/generated": -2.3934197425842285, |
|
"logits/real": -2.3536086082458496, |
|
"logps/generated": -532.2623901367188, |
|
"logps/real": -370.50146484375, |
|
"loss": 0.1024, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.107502937316895, |
|
"rewards/margins": 9.64056396484375, |
|
"rewards/real": -2.4669392108917236, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.901452035229707e-07, |
|
"logits/generated": -2.3607726097106934, |
|
"logits/real": -2.3698906898498535, |
|
"logps/generated": -516.7948608398438, |
|
"logps/real": -358.2063293457031, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.62265396118164, |
|
"rewards/margins": 11.326190948486328, |
|
"rewards/real": -1.2964636087417603, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8895501071173533e-07, |
|
"logits/generated": -2.3465137481689453, |
|
"logits/real": -2.3854851722717285, |
|
"logps/generated": -521.5613403320312, |
|
"logps/real": -320.7744140625, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.96727180480957, |
|
"rewards/margins": 10.701366424560547, |
|
"rewards/real": -2.2659034729003906, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8776481790049987e-07, |
|
"logits/generated": -2.39072847366333, |
|
"logits/real": -2.3972249031066895, |
|
"logps/generated": -533.3491821289062, |
|
"logps/real": -341.78558349609375, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -14.228726387023926, |
|
"rewards/margins": 12.107359886169434, |
|
"rewards/real": -2.1213667392730713, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.865746250892644e-07, |
|
"logits/generated": -2.2480862140655518, |
|
"logits/real": -2.340782642364502, |
|
"logps/generated": -566.9257202148438, |
|
"logps/real": -318.01495361328125, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -15.223588943481445, |
|
"rewards/margins": 12.4530029296875, |
|
"rewards/real": -2.77058482170105, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538443227802905e-07, |
|
"logits/generated": -2.261993169784546, |
|
"logits/real": -2.2912187576293945, |
|
"logps/generated": -497.12091064453125, |
|
"logps/real": -354.21795654296875, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -13.343725204467773, |
|
"rewards/margins": 10.44404411315918, |
|
"rewards/real": -2.8996803760528564, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.841942394667936e-07, |
|
"logits/generated": -2.333307981491089, |
|
"logits/real": -2.3561549186706543, |
|
"logps/generated": -516.3057250976562, |
|
"logps/real": -322.0869140625, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -13.671916007995605, |
|
"rewards/margins": 10.728243827819824, |
|
"rewards/real": -2.943671226501465, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8300404665555817e-07, |
|
"logits/generated": -2.332399845123291, |
|
"logits/real": -2.285750389099121, |
|
"logps/generated": -566.4459228515625, |
|
"logps/real": -421.2608337402344, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.901707649230957, |
|
"rewards/margins": 11.070039749145508, |
|
"rewards/real": -2.8316686153411865, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8181385384432276e-07, |
|
"logits/generated": -2.232886791229248, |
|
"logits/real": -2.280543804168701, |
|
"logps/generated": -488.8804626464844, |
|
"logps/real": -350.1278991699219, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -12.198708534240723, |
|
"rewards/margins": 9.276993751525879, |
|
"rewards/real": -2.92171573638916, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8062366103308735e-07, |
|
"logits/generated": -2.254783868789673, |
|
"logits/real": -2.3048110008239746, |
|
"logps/generated": -550.8900146484375, |
|
"logps/real": -392.82562255859375, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -13.891596794128418, |
|
"rewards/margins": 11.135417938232422, |
|
"rewards/real": -2.7561793327331543, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.794334682218519e-07, |
|
"logits/generated": -2.182992458343506, |
|
"logits/real": -2.2217135429382324, |
|
"logps/generated": -545.983154296875, |
|
"logps/real": -389.5903015136719, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -14.36902141571045, |
|
"rewards/margins": 11.656338691711426, |
|
"rewards/real": -2.712681770324707, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7824327541061653e-07, |
|
"logits/generated": -2.213576555252075, |
|
"logits/real": -2.1897943019866943, |
|
"logps/generated": -545.77490234375, |
|
"logps/real": -393.1239013671875, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -13.385502815246582, |
|
"rewards/margins": 10.636938095092773, |
|
"rewards/real": -2.748565673828125, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7705308259938107e-07, |
|
"logits/generated": -2.2783615589141846, |
|
"logits/real": -2.3009047508239746, |
|
"logps/generated": -476.43389892578125, |
|
"logps/real": -353.0292053222656, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.137995719909668, |
|
"rewards/margins": 9.004945755004883, |
|
"rewards/real": -3.1330504417419434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7586288978814566e-07, |
|
"logits/generated": -2.351835250854492, |
|
"logits/real": -2.3665931224823, |
|
"logps/generated": -560.801025390625, |
|
"logps/real": -415.95599365234375, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.19482707977295, |
|
"rewards/margins": 11.202047348022461, |
|
"rewards/real": -1.992780327796936, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7467269697691025e-07, |
|
"logits/generated": -2.382901430130005, |
|
"logits/real": -2.393000841140747, |
|
"logps/generated": -505.42071533203125, |
|
"logps/real": -403.2711181640625, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -11.522603988647461, |
|
"rewards/margins": 9.553156852722168, |
|
"rewards/real": -1.9694464206695557, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7348250416567484e-07, |
|
"logits/generated": -2.3661327362060547, |
|
"logits/real": -2.366842746734619, |
|
"logps/generated": -558.5916748046875, |
|
"logps/real": -373.6806640625, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.855193138122559, |
|
"rewards/margins": 10.633702278137207, |
|
"rewards/real": -1.2214914560317993, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.722923113544394e-07, |
|
"logits/generated": -2.339836597442627, |
|
"logits/real": -2.34541654586792, |
|
"logps/generated": -536.8855590820312, |
|
"logps/real": -369.8499450683594, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.604657173156738, |
|
"rewards/margins": 10.141790390014648, |
|
"rewards/real": -1.462865948677063, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.71102118543204e-07, |
|
"logits/generated": -2.3748319149017334, |
|
"logits/real": -2.347130298614502, |
|
"logps/generated": -509.50506591796875, |
|
"logps/real": -383.81207275390625, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.870028495788574, |
|
"rewards/margins": 9.974245071411133, |
|
"rewards/real": -0.8957852125167847, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6991192573196855e-07, |
|
"logits/generated": -2.3040242195129395, |
|
"logits/real": -2.3185038566589355, |
|
"logps/generated": -511.0401916503906, |
|
"logps/real": -363.1226501464844, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.427467346191406, |
|
"rewards/margins": 11.945287704467773, |
|
"rewards/real": -1.4821794033050537, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6872173292073314e-07, |
|
"logits/generated": -2.3647196292877197, |
|
"logits/real": -2.3546738624572754, |
|
"logps/generated": -562.1310424804688, |
|
"logps/real": -375.43316650390625, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.149296760559082, |
|
"rewards/margins": 12.207903861999512, |
|
"rewards/real": -1.9413917064666748, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6753154010949773e-07, |
|
"logits/generated": -2.301741123199463, |
|
"logits/real": -2.3652164936065674, |
|
"logps/generated": -549.0155639648438, |
|
"logps/real": -403.6900329589844, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.563409805297852, |
|
"rewards/margins": 14.276460647583008, |
|
"rewards/real": -1.2869514226913452, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663413472982623e-07, |
|
"logits/generated": -2.261230707168579, |
|
"logits/real": -2.3311500549316406, |
|
"logps/generated": -563.3414916992188, |
|
"logps/real": -348.5225524902344, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.899667739868164, |
|
"rewards/margins": 13.73414421081543, |
|
"rewards/real": -2.1655211448669434, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6515115448702686e-07, |
|
"logits/generated": -2.372976779937744, |
|
"logits/real": -2.400886058807373, |
|
"logps/generated": -569.90673828125, |
|
"logps/real": -390.90704345703125, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.986291885375977, |
|
"rewards/margins": 13.204297065734863, |
|
"rewards/real": -1.7819948196411133, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.639609616757915e-07, |
|
"logits/generated": -2.2624919414520264, |
|
"logits/real": -2.3082499504089355, |
|
"logps/generated": -564.4075317382812, |
|
"logps/real": -394.94287109375, |
|
"loss": 0.017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.597874641418457, |
|
"rewards/margins": 13.840059280395508, |
|
"rewards/real": -1.7578150033950806, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6277076886455604e-07, |
|
"logits/generated": -2.1787052154541016, |
|
"logits/real": -2.215146780014038, |
|
"logps/generated": -520.8050537109375, |
|
"logps/real": -337.3519287109375, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.893610954284668, |
|
"rewards/margins": 13.641830444335938, |
|
"rewards/real": -2.2517800331115723, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.615805760533206e-07, |
|
"logits/generated": -2.227945566177368, |
|
"logits/real": -2.295309543609619, |
|
"logps/generated": -569.5865478515625, |
|
"logps/real": -361.73980712890625, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.241802215576172, |
|
"rewards/margins": 14.572728157043457, |
|
"rewards/real": -1.669075608253479, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.603903832420852e-07, |
|
"logits/generated": -2.281085968017578, |
|
"logits/real": -2.32185697555542, |
|
"logps/generated": -550.8215942382812, |
|
"logps/real": -361.8749694824219, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.377789497375488, |
|
"rewards/margins": 12.968729972839355, |
|
"rewards/real": -2.4090590476989746, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5920019043084976e-07, |
|
"logits/generated": -2.36209774017334, |
|
"logits/real": -2.285122871398926, |
|
"logps/generated": -528.7139892578125, |
|
"logps/real": -321.8282775878906, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.81471061706543, |
|
"rewards/margins": 13.131657600402832, |
|
"rewards/real": -2.683054208755493, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5800999761961435e-07, |
|
"logits/generated": -2.2304556369781494, |
|
"logits/real": -2.2633605003356934, |
|
"logps/generated": -569.7330932617188, |
|
"logps/real": -362.989501953125, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.337574005126953, |
|
"rewards/margins": 14.290669441223145, |
|
"rewards/real": -2.046905994415283, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5681980480837893e-07, |
|
"logits/generated": -2.1861634254455566, |
|
"logits/real": -2.213642120361328, |
|
"logps/generated": -559.1907348632812, |
|
"logps/real": -386.95330810546875, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.114718437194824, |
|
"rewards/margins": 12.973544120788574, |
|
"rewards/real": -2.1411757469177246, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.556296119971435e-07, |
|
"logits/generated": -2.160221815109253, |
|
"logits/real": -2.219512701034546, |
|
"logps/generated": -548.2822265625, |
|
"logps/real": -373.2680358886719, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.96226692199707, |
|
"rewards/margins": 15.323366165161133, |
|
"rewards/real": -2.6389002799987793, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5443941918590806e-07, |
|
"logits/generated": -2.237609386444092, |
|
"logits/real": -2.2286012172698975, |
|
"logps/generated": -572.4905395507812, |
|
"logps/real": -334.9125061035156, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.150920867919922, |
|
"rewards/margins": 14.536825180053711, |
|
"rewards/real": -3.614098072052002, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.532492263746727e-07, |
|
"logits/generated": -2.184624195098877, |
|
"logits/real": -2.211127281188965, |
|
"logps/generated": -608.9227905273438, |
|
"logps/real": -361.32684326171875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.036636352539062, |
|
"rewards/margins": 16.421756744384766, |
|
"rewards/real": -2.6148791313171387, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5205903356343724e-07, |
|
"logits/generated": -2.1313374042510986, |
|
"logits/real": -2.173050880432129, |
|
"logps/generated": -598.5973510742188, |
|
"logps/real": -355.4421081542969, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.13778305053711, |
|
"rewards/margins": 16.43929672241211, |
|
"rewards/real": -3.6984870433807373, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.5086884075220183e-07, |
|
"logits/generated": -2.087921619415283, |
|
"logits/real": -2.1425328254699707, |
|
"logps/generated": -566.9527587890625, |
|
"logps/real": -332.5347595214844, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.658199310302734, |
|
"rewards/margins": 15.989227294921875, |
|
"rewards/real": -3.6689727306365967, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.496786479409664e-07, |
|
"logits/generated": -2.07328200340271, |
|
"logits/real": -2.1263468265533447, |
|
"logps/generated": -567.5440063476562, |
|
"logps/real": -416.94256591796875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.27109146118164, |
|
"rewards/margins": 15.73029899597168, |
|
"rewards/real": -3.540794849395752, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.48488455129731e-07, |
|
"logits/generated": -2.0371181964874268, |
|
"logits/real": -2.082137107849121, |
|
"logps/generated": -556.2427978515625, |
|
"logps/real": -303.4817810058594, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.692432403564453, |
|
"rewards/margins": 16.322385787963867, |
|
"rewards/real": -3.3700478076934814, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4729826231849555e-07, |
|
"logits/generated": -2.097503423690796, |
|
"logits/real": -2.1778554916381836, |
|
"logps/generated": -610.5614013671875, |
|
"logps/real": -399.98358154296875, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.528287887573242, |
|
"rewards/margins": 15.74824047088623, |
|
"rewards/real": -3.780047655105591, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.461080695072602e-07, |
|
"logits/generated": -2.0831446647644043, |
|
"logits/real": -2.2034473419189453, |
|
"logps/generated": -588.8594970703125, |
|
"logps/real": -398.7830810546875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.589412689208984, |
|
"rewards/margins": 16.229389190673828, |
|
"rewards/real": -1.360022783279419, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.449178766960247e-07, |
|
"logits/generated": -2.1205058097839355, |
|
"logits/real": -2.1790976524353027, |
|
"logps/generated": -588.9290771484375, |
|
"logps/real": -380.9736633300781, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.767770767211914, |
|
"rewards/margins": 14.602663040161133, |
|
"rewards/real": -3.1651082038879395, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4372768388478937e-07, |
|
"logits/generated": -2.1685116291046143, |
|
"logits/real": -2.245673656463623, |
|
"logps/generated": -547.3893432617188, |
|
"logps/real": -357.3170471191406, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.38484001159668, |
|
"rewards/margins": 14.403864860534668, |
|
"rewards/real": -1.9809764623641968, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.425374910735539e-07, |
|
"logits/generated": -2.249411106109619, |
|
"logits/real": -2.279759645462036, |
|
"logps/generated": -618.198486328125, |
|
"logps/real": -434.0210876464844, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.497578620910645, |
|
"rewards/margins": 12.881543159484863, |
|
"rewards/real": -2.616034984588623, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.413472982623185e-07, |
|
"logits/generated": -2.3159961700439453, |
|
"logits/real": -2.294581651687622, |
|
"logps/generated": -537.07861328125, |
|
"logps/real": -337.27874755859375, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.291156768798828, |
|
"rewards/margins": 15.182905197143555, |
|
"rewards/real": -2.1082491874694824, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.401571054510831e-07, |
|
"logits/generated": -2.2418415546417236, |
|
"logits/real": -2.266832113265991, |
|
"logps/generated": -598.0546264648438, |
|
"logps/real": -402.66510009765625, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.92933464050293, |
|
"rewards/margins": 14.629178047180176, |
|
"rewards/real": -2.300158739089966, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.389669126398476e-07, |
|
"logits/generated": -2.200573205947876, |
|
"logits/real": -2.224865436553955, |
|
"logps/generated": -546.8048706054688, |
|
"logps/real": -372.92010498046875, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.779863357543945, |
|
"rewards/margins": 15.325950622558594, |
|
"rewards/real": -3.4539108276367188, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377767198286122e-07, |
|
"logits/generated": -2.172501802444458, |
|
"logits/real": -2.224087953567505, |
|
"logps/generated": -577.8551025390625, |
|
"logps/real": -396.276123046875, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.138914108276367, |
|
"rewards/margins": 15.50109577178955, |
|
"rewards/real": -3.6378180980682373, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.365865270173768e-07, |
|
"logits/generated": -2.222236394882202, |
|
"logits/real": -2.215315341949463, |
|
"logps/generated": -606.78955078125, |
|
"logps/real": -401.0457763671875, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.88642120361328, |
|
"rewards/margins": 16.262500762939453, |
|
"rewards/real": -2.6239190101623535, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.353963342061414e-07, |
|
"logits/generated": -2.0716452598571777, |
|
"logits/real": -2.118318796157837, |
|
"logps/generated": -649.42431640625, |
|
"logps/real": -411.53338623046875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.20388412475586, |
|
"rewards/margins": 17.11600112915039, |
|
"rewards/real": -3.0878825187683105, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3420614139490593e-07, |
|
"logits/generated": -2.0571255683898926, |
|
"logits/real": -2.0999083518981934, |
|
"logps/generated": -555.8407592773438, |
|
"logps/real": -349.90155029296875, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.651325225830078, |
|
"rewards/margins": 15.208650588989258, |
|
"rewards/real": -3.442675828933716, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3301594858367057e-07, |
|
"logits/generated": -2.0144572257995605, |
|
"logits/real": -2.0585360527038574, |
|
"logps/generated": -564.4594116210938, |
|
"logps/real": -357.2227478027344, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.891742706298828, |
|
"rewards/margins": 17.29900550842285, |
|
"rewards/real": -3.5927371978759766, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.318257557724351e-07, |
|
"logits/generated": -2.096843957901001, |
|
"logits/real": -2.2019436359405518, |
|
"logps/generated": -579.1746826171875, |
|
"logps/real": -376.2278137207031, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.490825653076172, |
|
"rewards/margins": 15.950909614562988, |
|
"rewards/real": -4.539917945861816, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.306355629611997e-07, |
|
"logits/generated": -2.2774910926818848, |
|
"logits/real": -2.312415599822998, |
|
"logps/generated": -589.5444946289062, |
|
"logps/real": -355.139404296875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.514019012451172, |
|
"rewards/margins": 14.974294662475586, |
|
"rewards/real": -2.5397236347198486, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.294453701499643e-07, |
|
"logits/generated": -2.2411015033721924, |
|
"logits/real": -2.2287962436676025, |
|
"logps/generated": -519.8386840820312, |
|
"logps/real": -335.5692443847656, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.851512908935547, |
|
"rewards/margins": 13.842289924621582, |
|
"rewards/real": -3.0092215538024902, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.282551773387289e-07, |
|
"logits/generated": -2.2905209064483643, |
|
"logits/real": -2.3136816024780273, |
|
"logps/generated": -624.5968017578125, |
|
"logps/real": -403.45501708984375, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.708858489990234, |
|
"rewards/margins": 15.356656074523926, |
|
"rewards/real": -2.352205276489258, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.270649845274934e-07, |
|
"logits/generated": -2.268571376800537, |
|
"logits/real": -2.270794630050659, |
|
"logps/generated": -538.6395263671875, |
|
"logps/real": -346.6217346191406, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.539231300354004, |
|
"rewards/margins": 12.0559720993042, |
|
"rewards/real": -0.4832596778869629, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2587479171625806e-07, |
|
"logits/generated": -2.176980495452881, |
|
"logits/real": -2.2080063819885254, |
|
"logps/generated": -514.9129638671875, |
|
"logps/real": -330.4537048339844, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.298626899719238, |
|
"rewards/margins": 13.659899711608887, |
|
"rewards/real": -1.6387275457382202, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.246845989050226e-07, |
|
"logits/generated": -2.1879403591156006, |
|
"logits/real": -2.1978180408477783, |
|
"logps/generated": -593.1455078125, |
|
"logps/real": -375.1646423339844, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.365800857543945, |
|
"rewards/margins": 15.398488998413086, |
|
"rewards/real": -1.967309594154358, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.234944060937872e-07, |
|
"logits/generated": -2.192488193511963, |
|
"logits/real": -2.1515822410583496, |
|
"logps/generated": -523.1942138671875, |
|
"logps/real": -323.0399475097656, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.100679397583008, |
|
"rewards/margins": 14.689547538757324, |
|
"rewards/real": -2.4111287593841553, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2230421328255177e-07, |
|
"logits/generated": -2.1857666969299316, |
|
"logits/real": -2.1973021030426025, |
|
"logps/generated": -573.9412841796875, |
|
"logps/real": -383.3498840332031, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.325065612792969, |
|
"rewards/margins": 13.42688274383545, |
|
"rewards/real": -1.8981819152832031, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2111402047131636e-07, |
|
"logits/generated": -2.1818251609802246, |
|
"logits/real": -2.138362169265747, |
|
"logps/generated": -582.6113891601562, |
|
"logps/real": -393.2650146484375, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.432369232177734, |
|
"rewards/margins": 14.267339706420898, |
|
"rewards/real": -3.1650280952453613, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.199238276600809e-07, |
|
"logits/generated": -2.1671457290649414, |
|
"logits/real": -2.165565013885498, |
|
"logps/generated": -562.9454345703125, |
|
"logps/real": -394.97747802734375, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.854740142822266, |
|
"rewards/margins": 14.695347785949707, |
|
"rewards/real": -3.159393072128296, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1873363484884554e-07, |
|
"logits/generated": -2.1330654621124268, |
|
"logits/real": -2.1521830558776855, |
|
"logps/generated": -639.1904296875, |
|
"logps/real": -405.31842041015625, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.329586029052734, |
|
"rewards/margins": 17.111438751220703, |
|
"rewards/real": -3.218146800994873, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.175434420376101e-07, |
|
"logits/generated": -2.189150333404541, |
|
"logits/real": -2.256707191467285, |
|
"logps/generated": -564.0371704101562, |
|
"logps/real": -355.9576110839844, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.11702537536621, |
|
"rewards/margins": 15.930302619934082, |
|
"rewards/real": -3.186723232269287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1635324922637467e-07, |
|
"logits/generated": -2.059345245361328, |
|
"logits/real": -2.0931825637817383, |
|
"logps/generated": -592.4450073242188, |
|
"logps/real": -353.2586975097656, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.638408660888672, |
|
"rewards/margins": 15.883349418640137, |
|
"rewards/real": -3.7550551891326904, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1516305641513926e-07, |
|
"logits/generated": -2.092940092086792, |
|
"logits/real": -2.1705400943756104, |
|
"logps/generated": -628.2099609375, |
|
"logps/real": -390.40728759765625, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.64360237121582, |
|
"rewards/margins": 16.640026092529297, |
|
"rewards/real": -4.003575801849365, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.139728636039038e-07, |
|
"logits/generated": -1.9676166772842407, |
|
"logits/real": -2.0171456336975098, |
|
"logps/generated": -546.7244873046875, |
|
"logps/real": -362.56488037109375, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.221649169921875, |
|
"rewards/margins": 15.068666458129883, |
|
"rewards/real": -4.152983665466309, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.127826707926684e-07, |
|
"logits/generated": -2.1424899101257324, |
|
"logits/real": -2.1512458324432373, |
|
"logps/generated": -566.885986328125, |
|
"logps/real": -319.979736328125, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.882003784179688, |
|
"rewards/margins": 15.76966667175293, |
|
"rewards/real": -3.112335205078125, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.11592477981433e-07, |
|
"logits/generated": -2.140007495880127, |
|
"logits/real": -2.138995409011841, |
|
"logps/generated": -605.3294677734375, |
|
"logps/real": -381.41290283203125, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.86813735961914, |
|
"rewards/margins": 14.63634967803955, |
|
"rewards/real": -4.231788158416748, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1040228517019756e-07, |
|
"logits/generated": -2.1069555282592773, |
|
"logits/real": -2.0523293018341064, |
|
"logps/generated": -636.1548461914062, |
|
"logps/real": -395.80914306640625, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.208894729614258, |
|
"rewards/margins": 18.558944702148438, |
|
"rewards/real": -2.649949550628662, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.092120923589621e-07, |
|
"logits/generated": -2.1070761680603027, |
|
"logits/real": -2.1584606170654297, |
|
"logps/generated": -615.450439453125, |
|
"logps/real": -398.21844482421875, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.43939208984375, |
|
"rewards/margins": 14.954734802246094, |
|
"rewards/real": -4.484656810760498, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0802189954772674e-07, |
|
"logits/generated": -2.0256807804107666, |
|
"logits/real": -2.057490825653076, |
|
"logps/generated": -540.1599731445312, |
|
"logps/real": -362.2354431152344, |
|
"loss": 0.0277, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.157804489135742, |
|
"rewards/margins": 14.374547004699707, |
|
"rewards/real": -3.7832565307617188, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.068317067364913e-07, |
|
"logits/generated": -2.0899441242218018, |
|
"logits/real": -2.1068785190582275, |
|
"logps/generated": -626.4033813476562, |
|
"logps/real": -453.5624084472656, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.779939651489258, |
|
"rewards/margins": 14.692364692687988, |
|
"rewards/real": -3.0875754356384277, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0564151392525587e-07, |
|
"logits/generated": -1.9991929531097412, |
|
"logits/real": -2.0575671195983887, |
|
"logps/generated": -572.4451904296875, |
|
"logps/real": -387.9151306152344, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.603885650634766, |
|
"rewards/margins": 14.808444023132324, |
|
"rewards/real": -3.7954421043395996, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0445132111402046e-07, |
|
"logits/generated": -1.957606315612793, |
|
"logits/real": -2.0042033195495605, |
|
"logps/generated": -491.47344970703125, |
|
"logps/real": -340.8980712890625, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.086580276489258, |
|
"rewards/margins": 13.114072799682617, |
|
"rewards/real": -2.9725046157836914, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0326112830278505e-07, |
|
"logits/generated": -1.9950910806655884, |
|
"logits/real": -2.022733211517334, |
|
"logps/generated": -572.0457763671875, |
|
"logps/real": -401.0966796875, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.224430084228516, |
|
"rewards/margins": 15.097747802734375, |
|
"rewards/real": -3.126683473587036, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.020709354915496e-07, |
|
"logits/generated": -1.8846557140350342, |
|
"logits/real": -2.030510187149048, |
|
"logps/generated": -571.71337890625, |
|
"logps/real": -432.107421875, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.2412166595459, |
|
"rewards/margins": 14.449191093444824, |
|
"rewards/real": -2.7920241355895996, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.0088074268031423e-07, |
|
"logits/generated": -1.9635789394378662, |
|
"logits/real": -2.010420322418213, |
|
"logps/generated": -595.8865966796875, |
|
"logps/real": -395.47271728515625, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -16.31340217590332, |
|
"rewards/margins": 13.558954238891602, |
|
"rewards/real": -2.754448175430298, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9969054986907876e-07, |
|
"logits/generated": -1.8813692331314087, |
|
"logits/real": -1.973508596420288, |
|
"logps/generated": -504.12823486328125, |
|
"logps/real": -347.0367736816406, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.611045837402344, |
|
"rewards/margins": 14.725499153137207, |
|
"rewards/real": -1.8855485916137695, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9850035705784335e-07, |
|
"logits/generated": -1.9549331665039062, |
|
"logits/real": -2.0346851348876953, |
|
"logps/generated": -609.5227661132812, |
|
"logps/real": -419.77618408203125, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.863967895507812, |
|
"rewards/margins": 14.792566299438477, |
|
"rewards/real": -3.071401357650757, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9731016424660794e-07, |
|
"logits/generated": -1.9871995449066162, |
|
"logits/real": -1.9926321506500244, |
|
"logps/generated": -571.4608764648438, |
|
"logps/real": -346.6838684082031, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.64493751525879, |
|
"rewards/margins": 14.440625190734863, |
|
"rewards/real": -2.2043120861053467, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9611997143537253e-07, |
|
"logits/generated": -1.995072603225708, |
|
"logits/real": -1.9926702976226807, |
|
"logps/generated": -639.6512451171875, |
|
"logps/real": -369.5633239746094, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.795745849609375, |
|
"rewards/margins": 15.904996871948242, |
|
"rewards/real": -2.890749454498291, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9492977862413707e-07, |
|
"logits/generated": -2.0281620025634766, |
|
"logits/real": -1.9819482564926147, |
|
"logps/generated": -570.8590698242188, |
|
"logps/real": -378.61474609375, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -17.853015899658203, |
|
"rewards/margins": 13.683568000793457, |
|
"rewards/real": -4.169447898864746, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937395858129017e-07, |
|
"logits/generated": -1.9663385152816772, |
|
"logits/real": -2.0112898349761963, |
|
"logps/generated": -612.8561401367188, |
|
"logps/real": -383.02532958984375, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.021343231201172, |
|
"rewards/margins": 16.094024658203125, |
|
"rewards/real": -2.927319288253784, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9254939300166625e-07, |
|
"logits/generated": -1.9601085186004639, |
|
"logits/real": -2.0084640979766846, |
|
"logps/generated": -595.7770385742188, |
|
"logps/real": -402.1640319824219, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.758970260620117, |
|
"rewards/margins": 15.546884536743164, |
|
"rewards/real": -3.2120864391326904, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.913592001904308e-07, |
|
"logits/generated": -1.959242820739746, |
|
"logits/real": -2.011341094970703, |
|
"logps/generated": -582.9662475585938, |
|
"logps/real": -326.1315002441406, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.912372589111328, |
|
"rewards/margins": 15.57469654083252, |
|
"rewards/real": -3.3376784324645996, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9016900737919543e-07, |
|
"logits/generated": -1.8837999105453491, |
|
"logits/real": -2.0170726776123047, |
|
"logps/generated": -615.4156494140625, |
|
"logps/real": -384.8844299316406, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.660049438476562, |
|
"rewards/margins": 15.770927429199219, |
|
"rewards/real": -2.8891234397888184, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8897881456795997e-07, |
|
"logits/generated": -1.8241097927093506, |
|
"logits/real": -1.7869752645492554, |
|
"logps/generated": -593.7149047851562, |
|
"logps/real": -370.52789306640625, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.744731903076172, |
|
"rewards/margins": 15.868573188781738, |
|
"rewards/real": -2.8761584758758545, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8778862175672456e-07, |
|
"logits/generated": -1.7767345905303955, |
|
"logits/real": -1.8023865222930908, |
|
"logps/generated": -588.9293823242188, |
|
"logps/real": -392.6181640625, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.669166564941406, |
|
"rewards/margins": 16.322322845458984, |
|
"rewards/real": -2.346843957901001, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8659842894548915e-07, |
|
"logits/generated": -1.7413629293441772, |
|
"logits/real": -1.7947721481323242, |
|
"logps/generated": -632.8668212890625, |
|
"logps/real": -339.634765625, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.01340103149414, |
|
"rewards/margins": 17.00638771057129, |
|
"rewards/real": -4.007015228271484, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8540823613425374e-07, |
|
"logits/generated": -1.7241382598876953, |
|
"logits/real": -1.8351795673370361, |
|
"logps/generated": -553.7213134765625, |
|
"logps/real": -371.4381408691406, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.440507888793945, |
|
"rewards/margins": 13.914960861206055, |
|
"rewards/real": -4.525545597076416, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8421804332301827e-07, |
|
"logits/generated": -1.7572247982025146, |
|
"logits/real": -1.8531087636947632, |
|
"logps/generated": -680.1027221679688, |
|
"logps/real": -414.3067321777344, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.988712310791016, |
|
"rewards/margins": 17.40224266052246, |
|
"rewards/real": -4.586469650268555, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.830278505117829e-07, |
|
"logits/generated": -1.7346735000610352, |
|
"logits/real": -1.8323335647583008, |
|
"logps/generated": -684.58642578125, |
|
"logps/real": -440.241943359375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.51043128967285, |
|
"rewards/margins": 17.93229103088379, |
|
"rewards/real": -5.57813835144043, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8183765770054745e-07, |
|
"logits/generated": -1.6866531372070312, |
|
"logits/real": -1.7377674579620361, |
|
"logps/generated": -626.2985229492188, |
|
"logps/real": -398.7217102050781, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -20.974733352661133, |
|
"rewards/margins": 15.526639938354492, |
|
"rewards/real": -5.448093414306641, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.806474648893121e-07, |
|
"logits/generated": -1.7734663486480713, |
|
"logits/real": -1.852805733680725, |
|
"logps/generated": -599.9769897460938, |
|
"logps/real": -384.3834228515625, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.156208038330078, |
|
"rewards/margins": 17.898412704467773, |
|
"rewards/real": -5.257795810699463, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.7945727207807663e-07, |
|
"logits/generated": -1.704843282699585, |
|
"logits/real": -1.8438117504119873, |
|
"logps/generated": -671.08935546875, |
|
"logps/real": -363.78399658203125, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.627620697021484, |
|
"rewards/margins": 18.943744659423828, |
|
"rewards/real": -4.6838765144348145, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.782670792668412e-07, |
|
"logits/generated": -1.8488250970840454, |
|
"logits/real": -1.937170386314392, |
|
"logps/generated": -617.1664428710938, |
|
"logps/real": -413.4002990722656, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.6809024810791, |
|
"rewards/margins": 16.090904235839844, |
|
"rewards/real": -5.5899977684021, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.770768864556058e-07, |
|
"logits/generated": -1.7172123193740845, |
|
"logits/real": -1.8333232402801514, |
|
"logps/generated": -608.66943359375, |
|
"logps/real": -358.6962890625, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.130813598632812, |
|
"rewards/margins": 17.398597717285156, |
|
"rewards/real": -4.732216835021973, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.758866936443704e-07, |
|
"logits/generated": -1.7257283926010132, |
|
"logits/real": -1.8596134185791016, |
|
"logps/generated": -620.2059936523438, |
|
"logps/real": -376.4665222167969, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.570621490478516, |
|
"rewards/margins": 18.450403213500977, |
|
"rewards/real": -4.120217323303223, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7469650083313494e-07, |
|
"logits/generated": -1.8226665258407593, |
|
"logits/real": -1.9213998317718506, |
|
"logps/generated": -523.0108642578125, |
|
"logps/real": -348.8974609375, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.249343872070312, |
|
"rewards/margins": 14.304719924926758, |
|
"rewards/real": -3.9446239471435547, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.735063080218996e-07, |
|
"logits/generated": -1.7809902429580688, |
|
"logits/real": -1.861524224281311, |
|
"logps/generated": -596.6590576171875, |
|
"logps/real": -363.1610107421875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.3269100189209, |
|
"rewards/margins": 17.15434455871582, |
|
"rewards/real": -2.1725666522979736, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.723161152106641e-07, |
|
"logits/generated": -1.6831693649291992, |
|
"logits/real": -1.7770426273345947, |
|
"logps/generated": -556.0060424804688, |
|
"logps/real": -342.6446838378906, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.728736877441406, |
|
"rewards/margins": 15.71430492401123, |
|
"rewards/real": -4.01443338394165, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.711259223994287e-07, |
|
"logits/generated": -1.7193806171417236, |
|
"logits/real": -1.7846571207046509, |
|
"logps/generated": -513.1950073242188, |
|
"logps/real": -321.84466552734375, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.99886131286621, |
|
"rewards/margins": 14.975049018859863, |
|
"rewards/real": -4.023811340332031, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.699357295881933e-07, |
|
"logits/generated": -1.696411371231079, |
|
"logits/real": -1.8342005014419556, |
|
"logps/generated": -525.8536376953125, |
|
"logps/real": -306.38720703125, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.83527374267578, |
|
"rewards/margins": 13.64979076385498, |
|
"rewards/real": -4.185482978820801, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.687455367769579e-07, |
|
"logits/generated": -1.777260184288025, |
|
"logits/real": -1.8473188877105713, |
|
"logps/generated": -579.9354248046875, |
|
"logps/real": -359.2183532714844, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.67335319519043, |
|
"rewards/margins": 13.895036697387695, |
|
"rewards/real": -2.77831768989563, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.675553439657224e-07, |
|
"logits/generated": -1.7697776556015015, |
|
"logits/real": -1.8199243545532227, |
|
"logps/generated": -516.1290893554688, |
|
"logps/real": -342.9108581542969, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.577775955200195, |
|
"rewards/margins": 13.427032470703125, |
|
"rewards/real": -2.150745391845703, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.66365151154487e-07, |
|
"logits/generated": -1.768214464187622, |
|
"logits/real": -1.8642857074737549, |
|
"logps/generated": -514.9273681640625, |
|
"logps/real": -360.794677734375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.164379119873047, |
|
"rewards/margins": 13.772100448608398, |
|
"rewards/real": -2.3922770023345947, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.651749583432516e-07, |
|
"logits/generated": -1.8366386890411377, |
|
"logits/real": -1.8158714771270752, |
|
"logps/generated": -596.3200073242188, |
|
"logps/real": -362.0704650878906, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.17621421813965, |
|
"rewards/margins": 14.673547744750977, |
|
"rewards/real": -2.5026676654815674, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6398476553201614e-07, |
|
"logits/generated": -1.715486764907837, |
|
"logits/real": -1.7608009576797485, |
|
"logps/generated": -592.689208984375, |
|
"logps/real": -394.7555236816406, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.238231658935547, |
|
"rewards/margins": 15.920003890991211, |
|
"rewards/real": -2.318227767944336, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.627945727207808e-07, |
|
"logits/generated": -1.8307558298110962, |
|
"logits/real": -1.8600902557373047, |
|
"logps/generated": -560.46142578125, |
|
"logps/real": -370.09747314453125, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.495569229125977, |
|
"rewards/margins": 14.71654987335205, |
|
"rewards/real": -3.7790215015411377, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.616043799095453e-07, |
|
"logits/generated": -1.7307708263397217, |
|
"logits/real": -1.7592779397964478, |
|
"logps/generated": -584.9305419921875, |
|
"logps/real": -359.0640869140625, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.275348663330078, |
|
"rewards/margins": 14.876652717590332, |
|
"rewards/real": -4.398694038391113, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.604141870983099e-07, |
|
"logits/generated": -1.702275037765503, |
|
"logits/real": -1.7664591073989868, |
|
"logps/generated": -556.9425048828125, |
|
"logps/real": -366.8645935058594, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.035137176513672, |
|
"rewards/margins": 15.076634407043457, |
|
"rewards/real": -3.9585037231445312, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.592239942870745e-07, |
|
"logits/generated": -1.8040691614151, |
|
"logits/real": -1.831241250038147, |
|
"logps/generated": -605.1649780273438, |
|
"logps/real": -383.25506591796875, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.789392471313477, |
|
"rewards/margins": 15.085497856140137, |
|
"rewards/real": -3.7038941383361816, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.580338014758391e-07, |
|
"logits/generated": -1.9082372188568115, |
|
"logits/real": -1.964238166809082, |
|
"logps/generated": -597.7811889648438, |
|
"logps/real": -401.4377746582031, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.443161010742188, |
|
"rewards/margins": 15.85346794128418, |
|
"rewards/real": -2.589694023132324, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.568436086646036e-07, |
|
"logits/generated": -1.999436616897583, |
|
"logits/real": -1.951202154159546, |
|
"logps/generated": -578.76806640625, |
|
"logps/real": -361.1490478515625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.621252059936523, |
|
"rewards/margins": 13.709065437316895, |
|
"rewards/real": -3.9121880531311035, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5565341585336827e-07, |
|
"logits/generated": -1.8233203887939453, |
|
"logits/real": -1.8996546268463135, |
|
"logps/generated": -575.7025756835938, |
|
"logps/real": -398.1545715332031, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.55044174194336, |
|
"rewards/margins": 15.433720588684082, |
|
"rewards/real": -3.1167218685150146, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.544632230421328e-07, |
|
"logits/generated": -1.8319776058197021, |
|
"logits/real": -1.9016917943954468, |
|
"logps/generated": -609.1539306640625, |
|
"logps/real": -345.43536376953125, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.780593872070312, |
|
"rewards/margins": 17.442068099975586, |
|
"rewards/real": -2.338524341583252, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.532730302308974e-07, |
|
"logits/generated": -1.9279365539550781, |
|
"logits/real": -1.9061311483383179, |
|
"logps/generated": -566.6038818359375, |
|
"logps/real": -330.6295166015625, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.055160522460938, |
|
"rewards/margins": 15.601313591003418, |
|
"rewards/real": -3.4538471698760986, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.52082837419662e-07, |
|
"logits/generated": -1.8530943393707275, |
|
"logits/real": -1.919136643409729, |
|
"logps/generated": -604.7074584960938, |
|
"logps/real": -362.3363342285156, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.198497772216797, |
|
"rewards/margins": 16.335779190063477, |
|
"rewards/real": -2.8627171516418457, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5089264460842657e-07, |
|
"logits/generated": -1.953850507736206, |
|
"logits/real": -1.872309923171997, |
|
"logps/generated": -618.9617309570312, |
|
"logps/real": -378.0514831542969, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.868818283081055, |
|
"rewards/margins": 15.176447868347168, |
|
"rewards/real": -3.692370653152466, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.497024517971911e-07, |
|
"logits/generated": -1.8568464517593384, |
|
"logits/real": -1.887955665588379, |
|
"logps/generated": -605.1824340820312, |
|
"logps/real": -405.60235595703125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.905994415283203, |
|
"rewards/margins": 15.54051685333252, |
|
"rewards/real": -3.3654770851135254, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.485122589859557e-07, |
|
"logits/generated": -1.8930320739746094, |
|
"logits/real": -1.8917458057403564, |
|
"logps/generated": -584.1777954101562, |
|
"logps/real": -337.369384765625, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -21.138484954833984, |
|
"rewards/margins": 17.352680206298828, |
|
"rewards/real": -3.785806179046631, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.473220661747203e-07, |
|
"logits/generated": -1.8552837371826172, |
|
"logits/real": -1.9193109273910522, |
|
"logps/generated": -608.6238403320312, |
|
"logps/real": -370.24603271484375, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.182647705078125, |
|
"rewards/margins": 16.14703369140625, |
|
"rewards/real": -3.035613536834717, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.461318733634849e-07, |
|
"logits/generated": -1.7766849994659424, |
|
"logits/real": -1.8722776174545288, |
|
"logps/generated": -549.0695190429688, |
|
"logps/real": -349.0986022949219, |
|
"loss": 0.013, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.278690338134766, |
|
"rewards/margins": 16.906200408935547, |
|
"rewards/real": -2.3724896907806396, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4494168055224947e-07, |
|
"logits/generated": -1.9358885288238525, |
|
"logits/real": -1.9020036458969116, |
|
"logps/generated": -619.1986694335938, |
|
"logps/real": -427.21258544921875, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.58693504333496, |
|
"rewards/margins": 15.541351318359375, |
|
"rewards/real": -3.0455851554870605, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4375148774101406e-07, |
|
"logits/generated": -1.799574613571167, |
|
"logits/real": -1.8969202041625977, |
|
"logps/generated": -522.123046875, |
|
"logps/real": -333.136474609375, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.633255004882812, |
|
"rewards/margins": 15.599235534667969, |
|
"rewards/real": -3.0340187549591064, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.425612949297786e-07, |
|
"logits/generated": -1.8291136026382446, |
|
"logits/real": -1.8533122539520264, |
|
"logps/generated": -593.2408447265625, |
|
"logps/real": -402.7211608886719, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.01093101501465, |
|
"rewards/margins": 15.119882583618164, |
|
"rewards/real": -3.8910484313964844, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.413711021185432e-07, |
|
"logits/generated": -1.8732038736343384, |
|
"logits/real": -1.8730173110961914, |
|
"logps/generated": -600.4080810546875, |
|
"logps/real": -323.9451904296875, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.80618667602539, |
|
"rewards/margins": 17.078380584716797, |
|
"rewards/real": -3.7278037071228027, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.401809093073078e-07, |
|
"logits/generated": -1.8166701793670654, |
|
"logits/real": -1.8924024105072021, |
|
"logps/generated": -576.9189453125, |
|
"logps/real": -372.4144592285156, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.3643798828125, |
|
"rewards/margins": 15.9496488571167, |
|
"rewards/real": -3.4147305488586426, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3899071649607236e-07, |
|
"logits/generated": -1.8222957849502563, |
|
"logits/real": -1.7936245203018188, |
|
"logps/generated": -639.0947265625, |
|
"logps/real": -437.8866271972656, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.86641502380371, |
|
"rewards/margins": 15.062356948852539, |
|
"rewards/real": -3.8040592670440674, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3780052368483693e-07, |
|
"logits/generated": -1.6525121927261353, |
|
"logits/real": -1.7323639392852783, |
|
"logps/generated": -528.6134033203125, |
|
"logps/real": -337.75048828125, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.23191261291504, |
|
"rewards/margins": 15.418153762817383, |
|
"rewards/real": -4.813759803771973, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3661033087360152e-07, |
|
"logits/generated": -1.8742326498031616, |
|
"logits/real": -1.941476583480835, |
|
"logps/generated": -597.1447143554688, |
|
"logps/real": -358.0196838378906, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.783157348632812, |
|
"rewards/margins": 15.727224349975586, |
|
"rewards/real": -3.055934190750122, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.354201380623661e-07, |
|
"logits/generated": -1.8624379634857178, |
|
"logits/real": -1.893402099609375, |
|
"logps/generated": -629.6782836914062, |
|
"logps/real": -406.6865234375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.850439071655273, |
|
"rewards/margins": 16.49611473083496, |
|
"rewards/real": -4.35432243347168, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.342299452511307e-07, |
|
"logits/generated": -1.8548250198364258, |
|
"logits/real": -1.8583017587661743, |
|
"logps/generated": -582.7992553710938, |
|
"logps/real": -323.04205322265625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.348865509033203, |
|
"rewards/margins": 18.243560791015625, |
|
"rewards/real": -4.105301856994629, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3303975243989526e-07, |
|
"logits/generated": -1.9024966955184937, |
|
"logits/real": -2.017508029937744, |
|
"logps/generated": -574.1644287109375, |
|
"logps/real": -349.75714111328125, |
|
"loss": 0.03, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.862314224243164, |
|
"rewards/margins": 17.064558029174805, |
|
"rewards/real": -3.7977538108825684, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3184955962865982e-07, |
|
"logits/generated": -1.9994853734970093, |
|
"logits/real": -2.0445475578308105, |
|
"logps/generated": -562.1621704101562, |
|
"logps/real": -349.68695068359375, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.807878494262695, |
|
"rewards/margins": 17.700210571289062, |
|
"rewards/real": -4.107669353485107, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.306593668174244e-07, |
|
"logits/generated": -1.9293378591537476, |
|
"logits/real": -2.0241332054138184, |
|
"logps/generated": -584.3340454101562, |
|
"logps/real": -374.9560546875, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -19.921669006347656, |
|
"rewards/margins": 16.468935012817383, |
|
"rewards/real": -3.452733278274536, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2946917400618898e-07, |
|
"logits/generated": -1.894095778465271, |
|
"logits/real": -1.973131775856018, |
|
"logps/generated": -621.0687866210938, |
|
"logps/real": -442.6654357910156, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.7646427154541, |
|
"rewards/margins": 17.236825942993164, |
|
"rewards/real": -4.527815818786621, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2827898119495357e-07, |
|
"logits/generated": -1.908696174621582, |
|
"logits/real": -1.8996613025665283, |
|
"logps/generated": -641.0946044921875, |
|
"logps/real": -396.1679992675781, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.93410873413086, |
|
"rewards/margins": 18.131559371948242, |
|
"rewards/real": -4.802548885345459, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2708878838371816e-07, |
|
"logits/generated": -1.887770414352417, |
|
"logits/real": -1.9259475469589233, |
|
"logps/generated": -641.0634765625, |
|
"logps/real": -353.3055419921875, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.32883071899414, |
|
"rewards/margins": 18.148107528686523, |
|
"rewards/real": -4.18071985244751, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2589859557248272e-07, |
|
"logits/generated": -1.931406021118164, |
|
"logits/real": -1.9512712955474854, |
|
"logps/generated": -613.9124755859375, |
|
"logps/real": -410.6527404785156, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.747358322143555, |
|
"rewards/margins": 16.431184768676758, |
|
"rewards/real": -5.31617546081543, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.247084027612473e-07, |
|
"logits/generated": -1.9454374313354492, |
|
"logits/real": -1.9846471548080444, |
|
"logps/generated": -613.2532348632812, |
|
"logps/real": -419.8189392089844, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.502111434936523, |
|
"rewards/margins": 19.032825469970703, |
|
"rewards/real": -3.4692866802215576, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.235182099500119e-07, |
|
"logits/generated": -1.943305253982544, |
|
"logits/real": -1.9613683223724365, |
|
"logps/generated": -590.8385620117188, |
|
"logps/real": -376.8114013671875, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.899354934692383, |
|
"rewards/margins": 15.790725708007812, |
|
"rewards/real": -3.108628273010254, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2232801713877646e-07, |
|
"logits/generated": -1.969151258468628, |
|
"logits/real": -2.0842716693878174, |
|
"logps/generated": -567.1388549804688, |
|
"logps/real": -386.68927001953125, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.092025756835938, |
|
"rewards/margins": 14.683004379272461, |
|
"rewards/real": -4.409019947052002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113782432754105e-07, |
|
"logits/generated": -2.0907585620880127, |
|
"logits/real": -2.102865219116211, |
|
"logps/generated": -599.5132446289062, |
|
"logps/real": -384.66351318359375, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -20.033435821533203, |
|
"rewards/margins": 15.8507080078125, |
|
"rewards/real": -4.182726860046387, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.1994763151630564e-07, |
|
"logits/generated": -2.1723508834838867, |
|
"logits/real": -2.1705594062805176, |
|
"logps/generated": -512.1658935546875, |
|
"logps/real": -349.0197448730469, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.58482551574707, |
|
"rewards/margins": 14.868623733520508, |
|
"rewards/real": -1.716202735900879, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.187574387050702e-07, |
|
"logits/generated": -2.1666464805603027, |
|
"logits/real": -2.1792376041412354, |
|
"logps/generated": -632.5197143554688, |
|
"logps/real": -366.4725646972656, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.97044563293457, |
|
"rewards/margins": 16.773412704467773, |
|
"rewards/real": -2.1970319747924805, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.175672458938348e-07, |
|
"logits/generated": -2.1748318672180176, |
|
"logits/real": -2.1950387954711914, |
|
"logps/generated": -549.257568359375, |
|
"logps/real": -374.5805969238281, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.554750442504883, |
|
"rewards/margins": 15.931724548339844, |
|
"rewards/real": -2.6230263710021973, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1637705308259938e-07, |
|
"logits/generated": -2.2087433338165283, |
|
"logits/real": -2.2343385219573975, |
|
"logps/generated": -569.7178344726562, |
|
"logps/real": -363.35565185546875, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -18.506458282470703, |
|
"rewards/margins": 15.537744522094727, |
|
"rewards/real": -2.968712568283081, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1518686027136395e-07, |
|
"logits/generated": -2.141085624694824, |
|
"logits/real": -2.2068240642547607, |
|
"logps/generated": -559.6201171875, |
|
"logps/real": -355.288818359375, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.508007049560547, |
|
"rewards/margins": 13.753092765808105, |
|
"rewards/real": -3.754911422729492, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1399666746012854e-07, |
|
"logits/generated": -2.1633191108703613, |
|
"logits/real": -2.1534600257873535, |
|
"logps/generated": -557.6275024414062, |
|
"logps/real": -355.95733642578125, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.064023971557617, |
|
"rewards/margins": 13.931376457214355, |
|
"rewards/real": -4.132648468017578, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1280647464889313e-07, |
|
"logits/generated": -2.146686315536499, |
|
"logits/real": -2.154139280319214, |
|
"logps/generated": -617.3681030273438, |
|
"logps/real": -377.43853759765625, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.22401237487793, |
|
"rewards/margins": 16.329097747802734, |
|
"rewards/real": -3.8949122428894043, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.116162818376577e-07, |
|
"logits/generated": -2.074739456176758, |
|
"logits/real": -2.1095595359802246, |
|
"logps/generated": -574.0628051757812, |
|
"logps/real": -360.5137023925781, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.636310577392578, |
|
"rewards/margins": 17.622827529907227, |
|
"rewards/real": -3.013484477996826, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1042608902642228e-07, |
|
"logits/generated": -2.1206960678100586, |
|
"logits/real": -2.20168399810791, |
|
"logps/generated": -583.8810424804688, |
|
"logps/real": -382.8518371582031, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.312095642089844, |
|
"rewards/margins": 15.806414604187012, |
|
"rewards/real": -4.505680561065674, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0923589621518687e-07, |
|
"logits/generated": -2.0229172706604004, |
|
"logits/real": -2.0557923316955566, |
|
"logps/generated": -597.6458740234375, |
|
"logps/real": -395.81646728515625, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.894927978515625, |
|
"rewards/margins": 17.441539764404297, |
|
"rewards/real": -3.453387498855591, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.080457034039514e-07, |
|
"logits/generated": -1.9580085277557373, |
|
"logits/real": -2.1535487174987793, |
|
"logps/generated": -578.9662475585938, |
|
"logps/real": -369.46197509765625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.469118118286133, |
|
"rewards/margins": 14.781881332397461, |
|
"rewards/real": -3.687236785888672, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.06855510592716e-07, |
|
"logits/generated": -2.0475711822509766, |
|
"logits/real": -2.0905933380126953, |
|
"logps/generated": -579.7207641601562, |
|
"logps/real": -362.5934143066406, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.495128631591797, |
|
"rewards/margins": 16.245441436767578, |
|
"rewards/real": -3.249690294265747, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0566531778148059e-07, |
|
"logits/generated": -1.9959481954574585, |
|
"logits/real": -2.1353626251220703, |
|
"logps/generated": -564.4955444335938, |
|
"logps/real": -383.1506042480469, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.533605575561523, |
|
"rewards/margins": 16.09157943725586, |
|
"rewards/real": -2.4420278072357178, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0447512497024515e-07, |
|
"logits/generated": -2.084301233291626, |
|
"logits/real": -2.0366158485412598, |
|
"logps/generated": -518.3314208984375, |
|
"logps/real": -350.7093505859375, |
|
"loss": 0.0225, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.012195587158203, |
|
"rewards/margins": 15.021069526672363, |
|
"rewards/real": -2.9911255836486816, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0328493215900974e-07, |
|
"logits/generated": -2.1363632678985596, |
|
"logits/real": -2.122278928756714, |
|
"logps/generated": -602.5611572265625, |
|
"logps/real": -397.9182434082031, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.253149032592773, |
|
"rewards/margins": 17.09138298034668, |
|
"rewards/real": -3.1617660522460938, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0209473934777433e-07, |
|
"logits/generated": -2.134699821472168, |
|
"logits/real": -2.08998441696167, |
|
"logps/generated": -580.0136108398438, |
|
"logps/real": -359.6705322265625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.094505310058594, |
|
"rewards/margins": 15.466270446777344, |
|
"rewards/real": -3.628235340118408, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0090454653653892e-07, |
|
"logits/generated": -2.042788028717041, |
|
"logits/real": -2.0056633949279785, |
|
"logps/generated": -532.9696044921875, |
|
"logps/real": -333.90618896484375, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.65140151977539, |
|
"rewards/margins": 15.834383010864258, |
|
"rewards/real": -2.8170199394226074, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9971435372530348e-07, |
|
"logits/generated": -2.0326991081237793, |
|
"logits/real": -2.0445380210876465, |
|
"logps/generated": -636.9520263671875, |
|
"logps/real": -420.206787109375, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.007139205932617, |
|
"rewards/margins": 16.871540069580078, |
|
"rewards/real": -3.1356008052825928, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9852416091406807e-07, |
|
"logits/generated": -2.16467022895813, |
|
"logits/real": -2.151834011077881, |
|
"logps/generated": -608.9406127929688, |
|
"logps/real": -386.32403564453125, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.408123016357422, |
|
"rewards/margins": 15.358880996704102, |
|
"rewards/real": -4.049244403839111, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9733396810283266e-07, |
|
"logits/generated": -1.994359016418457, |
|
"logits/real": -2.083233594894409, |
|
"logps/generated": -580.0289916992188, |
|
"logps/real": -387.74285888671875, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.273834228515625, |
|
"rewards/margins": 16.140888214111328, |
|
"rewards/real": -3.1329457759857178, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9614377529159722e-07, |
|
"logits/generated": -1.9821665287017822, |
|
"logits/real": -1.9590924978256226, |
|
"logps/generated": -658.1160278320312, |
|
"logps/real": -387.3288879394531, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.883031845092773, |
|
"rewards/margins": 18.182018280029297, |
|
"rewards/real": -3.7010135650634766, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9495358248036181e-07, |
|
"logits/generated": -1.8935340642929077, |
|
"logits/real": -1.9233297109603882, |
|
"logps/generated": -587.504638671875, |
|
"logps/real": -379.6001281738281, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.569808959960938, |
|
"rewards/margins": 16.9621639251709, |
|
"rewards/real": -4.607644081115723, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.937633896691264e-07, |
|
"logits/generated": -2.0384740829467773, |
|
"logits/real": -2.0107827186584473, |
|
"logps/generated": -560.0950927734375, |
|
"logps/real": -348.64178466796875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.69436264038086, |
|
"rewards/margins": 16.582056045532227, |
|
"rewards/real": -4.112307548522949, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9257319685789097e-07, |
|
"logits/generated": -1.8694576025009155, |
|
"logits/real": -1.8919403553009033, |
|
"logps/generated": -610.2615966796875, |
|
"logps/real": -373.0062561035156, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.5854434967041, |
|
"rewards/margins": 17.94769287109375, |
|
"rewards/real": -3.637749433517456, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9138300404665556e-07, |
|
"logits/generated": -1.833876371383667, |
|
"logits/real": -1.923029899597168, |
|
"logps/generated": -625.2069091796875, |
|
"logps/real": -445.4774475097656, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.171947479248047, |
|
"rewards/margins": 16.788434982299805, |
|
"rewards/real": -4.383509635925293, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9019281123542015e-07, |
|
"logits/generated": -1.8446261882781982, |
|
"logits/real": -1.9809293746948242, |
|
"logps/generated": -639.4974365234375, |
|
"logps/real": -433.82843017578125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.332645416259766, |
|
"rewards/margins": 17.92720603942871, |
|
"rewards/real": -4.405437469482422, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.890026184241847e-07, |
|
"logits/generated": -1.794547438621521, |
|
"logits/real": -1.902682900428772, |
|
"logps/generated": -686.3941650390625, |
|
"logps/real": -412.7117614746094, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.83676528930664, |
|
"rewards/margins": 18.992544174194336, |
|
"rewards/real": -4.844220161437988, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.878124256129493e-07, |
|
"logits/generated": -1.771178960800171, |
|
"logits/real": -1.8597948551177979, |
|
"logps/generated": -650.177490234375, |
|
"logps/real": -385.32281494140625, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.727323532104492, |
|
"rewards/margins": 18.738880157470703, |
|
"rewards/real": -3.9884445667266846, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.866222328017139e-07, |
|
"logits/generated": -1.7059656381607056, |
|
"logits/real": -1.7673842906951904, |
|
"logps/generated": -622.1861572265625, |
|
"logps/real": -364.33526611328125, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.290287017822266, |
|
"rewards/margins": 17.137935638427734, |
|
"rewards/real": -5.152352809906006, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8543203999047845e-07, |
|
"logits/generated": -1.7726774215698242, |
|
"logits/real": -1.9006750583648682, |
|
"logps/generated": -613.3416748046875, |
|
"logps/real": -358.4165954589844, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.165300369262695, |
|
"rewards/margins": 17.081605911254883, |
|
"rewards/real": -5.08369255065918, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8424184717924304e-07, |
|
"logits/generated": -1.7835719585418701, |
|
"logits/real": -1.8426719903945923, |
|
"logps/generated": -601.4617919921875, |
|
"logps/real": -340.8309631347656, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.10822868347168, |
|
"rewards/margins": 17.62343978881836, |
|
"rewards/real": -4.48478889465332, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.830516543680076e-07, |
|
"logits/generated": -1.864492416381836, |
|
"logits/real": -1.931165099143982, |
|
"logps/generated": -646.0354614257812, |
|
"logps/real": -399.3186340332031, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.49277114868164, |
|
"rewards/margins": 17.703113555908203, |
|
"rewards/real": -4.789654731750488, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8186146155677217e-07, |
|
"logits/generated": -1.6619055271148682, |
|
"logits/real": -1.682189702987671, |
|
"logps/generated": -651.1900634765625, |
|
"logps/real": -430.13775634765625, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.825260162353516, |
|
"rewards/margins": 19.324203491210938, |
|
"rewards/real": -4.501051902770996, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8067126874553676e-07, |
|
"logits/generated": -1.8066043853759766, |
|
"logits/real": -1.857184648513794, |
|
"logps/generated": -644.6920166015625, |
|
"logps/real": -422.68731689453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.160465240478516, |
|
"rewards/margins": 18.2052001953125, |
|
"rewards/real": -4.955265045166016, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7948107593430135e-07, |
|
"logits/generated": -1.7574580907821655, |
|
"logits/real": -1.6988914012908936, |
|
"logps/generated": -635.5706176757812, |
|
"logps/real": -414.3448791503906, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.139110565185547, |
|
"rewards/margins": 17.725711822509766, |
|
"rewards/real": -4.41339635848999, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.782908831230659e-07, |
|
"logits/generated": -1.7514142990112305, |
|
"logits/real": -1.833495855331421, |
|
"logps/generated": -635.5641479492188, |
|
"logps/real": -391.35406494140625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.829418182373047, |
|
"rewards/margins": 19.86545753479004, |
|
"rewards/real": -4.963961601257324, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771006903118305e-07, |
|
"logits/generated": -1.8058671951293945, |
|
"logits/real": -1.84176504611969, |
|
"logps/generated": -596.8150634765625, |
|
"logps/real": -392.7447814941406, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.030054092407227, |
|
"rewards/margins": 17.652753829956055, |
|
"rewards/real": -5.37730073928833, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.759104975005951e-07, |
|
"logits/generated": -1.7943785190582275, |
|
"logits/real": -1.8919038772583008, |
|
"logps/generated": -616.5109252929688, |
|
"logps/real": -351.06903076171875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.05794334411621, |
|
"rewards/margins": 18.479902267456055, |
|
"rewards/real": -5.578042984008789, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7472030468935965e-07, |
|
"logits/generated": -1.7733243703842163, |
|
"logits/real": -1.8298814296722412, |
|
"logps/generated": -644.5895385742188, |
|
"logps/real": -346.65716552734375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.519426345825195, |
|
"rewards/margins": 19.296615600585938, |
|
"rewards/real": -4.222809791564941, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7353011187812424e-07, |
|
"logits/generated": -1.7913854122161865, |
|
"logits/real": -1.7636489868164062, |
|
"logps/generated": -630.2733154296875, |
|
"logps/real": -346.322021484375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.583568572998047, |
|
"rewards/margins": 19.213102340698242, |
|
"rewards/real": -5.3704681396484375, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7233991906688883e-07, |
|
"logits/generated": -1.693394422531128, |
|
"logits/real": -1.777193307876587, |
|
"logps/generated": -647.3377685546875, |
|
"logps/real": -437.0379333496094, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.571914672851562, |
|
"rewards/margins": 18.28937530517578, |
|
"rewards/real": -6.282541751861572, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7114972625565342e-07, |
|
"logits/generated": -1.7938833236694336, |
|
"logits/real": -1.8682388067245483, |
|
"logps/generated": -612.8692016601562, |
|
"logps/real": -373.411865234375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.837581634521484, |
|
"rewards/margins": 18.990482330322266, |
|
"rewards/real": -4.847100734710693, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6995953344441799e-07, |
|
"logits/generated": -1.5872770547866821, |
|
"logits/real": -1.749686598777771, |
|
"logps/generated": -612.15576171875, |
|
"logps/real": -354.6670837402344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.107717514038086, |
|
"rewards/margins": 19.196178436279297, |
|
"rewards/real": -4.911539554595947, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6876934063318258e-07, |
|
"logits/generated": -1.744523048400879, |
|
"logits/real": -1.7055385112762451, |
|
"logps/generated": -612.4163818359375, |
|
"logps/real": -340.0469055175781, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.122411727905273, |
|
"rewards/margins": 20.10819435119629, |
|
"rewards/real": -6.014217376708984, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6757914782194717e-07, |
|
"logits/generated": -1.761125922203064, |
|
"logits/real": -1.713313102722168, |
|
"logps/generated": -677.6751708984375, |
|
"logps/real": -460.227294921875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.481826782226562, |
|
"rewards/margins": 20.24111557006836, |
|
"rewards/real": -4.240714073181152, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6638895501071173e-07, |
|
"logits/generated": -1.7685257196426392, |
|
"logits/real": -1.7595218420028687, |
|
"logps/generated": -670.4361572265625, |
|
"logps/real": -387.3614807128906, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.423667907714844, |
|
"rewards/margins": 20.145977020263672, |
|
"rewards/real": -5.277690887451172, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6519876219947632e-07, |
|
"logits/generated": -1.7515497207641602, |
|
"logits/real": -1.8205455541610718, |
|
"logps/generated": -609.548828125, |
|
"logps/real": -398.30230712890625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.230987548828125, |
|
"rewards/margins": 19.289710998535156, |
|
"rewards/real": -4.9412760734558105, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.640085693882409e-07, |
|
"logits/generated": -1.7568248510360718, |
|
"logits/real": -1.7875878810882568, |
|
"logps/generated": -670.6278076171875, |
|
"logps/real": -403.04669189453125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.942584991455078, |
|
"rewards/margins": 19.860820770263672, |
|
"rewards/real": -6.081762790679932, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6281837657700547e-07, |
|
"logits/generated": -1.672739028930664, |
|
"logits/real": -1.710694670677185, |
|
"logps/generated": -709.2623291015625, |
|
"logps/real": -427.17083740234375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.278446197509766, |
|
"rewards/margins": 20.85129737854004, |
|
"rewards/real": -6.42714786529541, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6162818376577006e-07, |
|
"logits/generated": -1.7365968227386475, |
|
"logits/real": -1.8601267337799072, |
|
"logps/generated": -647.3109130859375, |
|
"logps/real": -408.20904541015625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.648662567138672, |
|
"rewards/margins": 18.514421463012695, |
|
"rewards/real": -6.134242057800293, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6043799095453465e-07, |
|
"logits/generated": -1.7646595239639282, |
|
"logits/real": -1.7028201818466187, |
|
"logps/generated": -615.2380981445312, |
|
"logps/real": -392.93438720703125, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.260175704956055, |
|
"rewards/margins": 17.600215911865234, |
|
"rewards/real": -5.659959316253662, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.592477981432992e-07, |
|
"logits/generated": -1.677546501159668, |
|
"logits/real": -1.7684322595596313, |
|
"logps/generated": -675.1948852539062, |
|
"logps/real": -372.6117248535156, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.304229736328125, |
|
"rewards/margins": 22.81051254272461, |
|
"rewards/real": -5.493716239929199, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5805760533206378e-07, |
|
"logits/generated": -1.6419671773910522, |
|
"logits/real": -1.754093885421753, |
|
"logps/generated": -710.6311645507812, |
|
"logps/real": -412.677001953125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.5703125, |
|
"rewards/margins": 20.43376922607422, |
|
"rewards/real": -6.136545658111572, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5686741252082837e-07, |
|
"logits/generated": -1.6100307703018188, |
|
"logits/real": -1.7829450368881226, |
|
"logps/generated": -626.542236328125, |
|
"logps/real": -390.32904052734375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.756441116333008, |
|
"rewards/margins": 18.904939651489258, |
|
"rewards/real": -3.85149884223938, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5567721970959293e-07, |
|
"logits/generated": -1.7627454996109009, |
|
"logits/real": -1.9145416021347046, |
|
"logps/generated": -677.59765625, |
|
"logps/real": -411.33648681640625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.147655487060547, |
|
"rewards/margins": 19.63693618774414, |
|
"rewards/real": -4.51071834564209, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5448702689835752e-07, |
|
"logits/generated": -1.6949329376220703, |
|
"logits/real": -1.7547237873077393, |
|
"logps/generated": -625.1910400390625, |
|
"logps/real": -367.7977600097656, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.851118087768555, |
|
"rewards/margins": 19.71878433227539, |
|
"rewards/real": -4.132336616516113, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.532968340871221e-07, |
|
"logits/generated": -1.744741439819336, |
|
"logits/real": -1.664894700050354, |
|
"logps/generated": -640.474853515625, |
|
"logps/real": -399.55108642578125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.19083023071289, |
|
"rewards/margins": 18.417896270751953, |
|
"rewards/real": -4.772933006286621, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5210664127588667e-07, |
|
"logits/generated": -1.662096381187439, |
|
"logits/real": -1.8298946619033813, |
|
"logps/generated": -631.4981689453125, |
|
"logps/real": -397.19586181640625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.526195526123047, |
|
"rewards/margins": 19.6595516204834, |
|
"rewards/real": -3.866642475128174, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5091644846465126e-07, |
|
"logits/generated": -1.7289931774139404, |
|
"logits/real": -1.8209636211395264, |
|
"logps/generated": -650.0264892578125, |
|
"logps/real": -391.0191345214844, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.553695678710938, |
|
"rewards/margins": 18.257787704467773, |
|
"rewards/real": -4.295907974243164, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4972625565341585e-07, |
|
"logits/generated": -1.563467025756836, |
|
"logits/real": -1.7469037771224976, |
|
"logps/generated": -654.3523559570312, |
|
"logps/real": -378.0865783691406, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.712825775146484, |
|
"rewards/margins": 20.601755142211914, |
|
"rewards/real": -5.1110734939575195, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4853606284218042e-07, |
|
"logits/generated": -1.7174959182739258, |
|
"logits/real": -1.8672186136245728, |
|
"logps/generated": -646.060791015625, |
|
"logps/real": -402.63372802734375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.062244415283203, |
|
"rewards/margins": 20.159557342529297, |
|
"rewards/real": -5.902686595916748, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.47345870030945e-07, |
|
"logits/generated": -1.7195484638214111, |
|
"logits/real": -1.7659461498260498, |
|
"logps/generated": -670.35693359375, |
|
"logps/real": -430.11724853515625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.210329055786133, |
|
"rewards/margins": 19.280933380126953, |
|
"rewards/real": -5.92939567565918, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.461556772197096e-07, |
|
"logits/generated": -1.6868633031845093, |
|
"logits/real": -1.7503039836883545, |
|
"logps/generated": -666.7827758789062, |
|
"logps/real": -404.4327087402344, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.964031219482422, |
|
"rewards/margins": 18.93131446838379, |
|
"rewards/real": -6.03271484375, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4496548440847416e-07, |
|
"logits/generated": -1.5785818099975586, |
|
"logits/real": -1.695892572402954, |
|
"logps/generated": -671.3966674804688, |
|
"logps/real": -425.050537109375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.694400787353516, |
|
"rewards/margins": 22.770051956176758, |
|
"rewards/real": -5.924350261688232, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4377529159723875e-07, |
|
"logits/generated": -1.7265466451644897, |
|
"logits/real": -1.7282949686050415, |
|
"logps/generated": -644.9312744140625, |
|
"logps/real": -385.41571044921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.46233558654785, |
|
"rewards/margins": 20.13895034790039, |
|
"rewards/real": -5.323385238647461, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4258509878600334e-07, |
|
"logits/generated": -1.606693983078003, |
|
"logits/real": -1.7219680547714233, |
|
"logps/generated": -651.696533203125, |
|
"logps/real": -405.09185791015625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.30702781677246, |
|
"rewards/margins": 19.77613067626953, |
|
"rewards/real": -5.53089714050293, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4139490597476793e-07, |
|
"logits/generated": -1.691404104232788, |
|
"logits/real": -1.7261533737182617, |
|
"logps/generated": -705.8663330078125, |
|
"logps/real": -424.4842224121094, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.195613861083984, |
|
"rewards/margins": 22.152070999145508, |
|
"rewards/real": -6.043543815612793, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.402047131635325e-07, |
|
"logits/generated": -1.5611227750778198, |
|
"logits/real": -1.7058401107788086, |
|
"logps/generated": -670.8802490234375, |
|
"logps/real": -413.3358459472656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.489639282226562, |
|
"rewards/margins": 20.785070419311523, |
|
"rewards/real": -5.704569339752197, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3901452035229708e-07, |
|
"logits/generated": -1.512269377708435, |
|
"logits/real": -1.68305242061615, |
|
"logps/generated": -571.3214111328125, |
|
"logps/real": -344.5645751953125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.5626277923584, |
|
"rewards/margins": 18.92730140686035, |
|
"rewards/real": -5.6353278160095215, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3782432754106167e-07, |
|
"logits/generated": -1.530753493309021, |
|
"logits/real": -1.6607004404067993, |
|
"logps/generated": -622.2992553710938, |
|
"logps/real": -382.42303466796875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.785675048828125, |
|
"rewards/margins": 19.25429344177246, |
|
"rewards/real": -6.531381130218506, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3663413472982623e-07, |
|
"logits/generated": -1.732834815979004, |
|
"logits/real": -1.736498236656189, |
|
"logps/generated": -631.5408935546875, |
|
"logps/real": -347.4884033203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.657485961914062, |
|
"rewards/margins": 20.199687957763672, |
|
"rewards/real": -6.457800388336182, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.354439419185908e-07, |
|
"logits/generated": -1.5467314720153809, |
|
"logits/real": -1.6046216487884521, |
|
"logps/generated": -634.9423828125, |
|
"logps/real": -400.60052490234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.31796646118164, |
|
"rewards/margins": 20.177886962890625, |
|
"rewards/real": -7.140076637268066, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3425374910735539e-07, |
|
"logits/generated": -1.599615216255188, |
|
"logits/real": -1.7260444164276123, |
|
"logps/generated": -686.1055908203125, |
|
"logps/real": -475.2167053222656, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.89980697631836, |
|
"rewards/margins": 20.732288360595703, |
|
"rewards/real": -6.167518615722656, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3306355629611995e-07, |
|
"logits/generated": -1.519061803817749, |
|
"logits/real": -1.6003974676132202, |
|
"logps/generated": -736.18408203125, |
|
"logps/real": -413.788330078125, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.17459487915039, |
|
"rewards/margins": 22.599679946899414, |
|
"rewards/real": -6.574913024902344, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3187336348488454e-07, |
|
"logits/generated": -1.5416706800460815, |
|
"logits/real": -1.5915443897247314, |
|
"logps/generated": -693.075927734375, |
|
"logps/real": -461.91339111328125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.290576934814453, |
|
"rewards/margins": 20.055639266967773, |
|
"rewards/real": -6.234936237335205, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3068317067364913e-07, |
|
"logits/generated": -1.521410584449768, |
|
"logits/real": -1.6385908126831055, |
|
"logps/generated": -643.1195068359375, |
|
"logps/real": -369.3460998535156, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.421768188476562, |
|
"rewards/margins": 20.350460052490234, |
|
"rewards/real": -6.071305274963379, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.294929778624137e-07, |
|
"logits/generated": -1.5358374118804932, |
|
"logits/real": -1.6597099304199219, |
|
"logps/generated": -689.6113891601562, |
|
"logps/real": -466.4942321777344, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.892501831054688, |
|
"rewards/margins": 19.115379333496094, |
|
"rewards/real": -5.777121543884277, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2830278505117828e-07, |
|
"logits/generated": -1.406715989112854, |
|
"logits/real": -1.4844402074813843, |
|
"logps/generated": -658.787353515625, |
|
"logps/real": -436.83636474609375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.258020401000977, |
|
"rewards/margins": 21.475244522094727, |
|
"rewards/real": -6.782778739929199, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2711259223994287e-07, |
|
"logits/generated": -1.3396766185760498, |
|
"logits/real": -1.4809544086456299, |
|
"logps/generated": -681.5261840820312, |
|
"logps/real": -404.4817199707031, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.8902645111084, |
|
"rewards/margins": 21.514251708984375, |
|
"rewards/real": -6.376011848449707, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2592239942870743e-07, |
|
"logits/generated": -1.3948055505752563, |
|
"logits/real": -1.5513432025909424, |
|
"logps/generated": -698.7779541015625, |
|
"logps/real": -433.32244873046875, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.659128189086914, |
|
"rewards/margins": 20.854068756103516, |
|
"rewards/real": -6.805060386657715, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2473220661747202e-07, |
|
"logits/generated": -1.5047938823699951, |
|
"logits/real": -1.5021181106567383, |
|
"logps/generated": -733.7626342773438, |
|
"logps/real": -459.6304626464844, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.57221031188965, |
|
"rewards/margins": 21.513500213623047, |
|
"rewards/real": -9.058713912963867, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2354201380623661e-07, |
|
"logits/generated": -1.440640926361084, |
|
"logits/real": -1.4846980571746826, |
|
"logps/generated": -692.8338623046875, |
|
"logps/real": -438.29180908203125, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.915645599365234, |
|
"rewards/margins": 19.977619171142578, |
|
"rewards/real": -8.93802547454834, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2235182099500118e-07, |
|
"logits/generated": -1.4438087940216064, |
|
"logits/real": -1.5675140619277954, |
|
"logps/generated": -683.9814453125, |
|
"logps/real": -481.3240661621094, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.231365203857422, |
|
"rewards/margins": 20.71581268310547, |
|
"rewards/real": -7.515552520751953, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2116162818376577e-07, |
|
"logits/generated": -1.4532592296600342, |
|
"logits/real": -1.466347336769104, |
|
"logps/generated": -653.7462158203125, |
|
"logps/real": -410.3941955566406, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.256484985351562, |
|
"rewards/margins": 19.569355010986328, |
|
"rewards/real": -7.687130928039551, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1997143537253036e-07, |
|
"logits/generated": -1.519970178604126, |
|
"logits/real": -1.6325457096099854, |
|
"logps/generated": -666.7601318359375, |
|
"logps/real": -440.3809509277344, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.8579044342041, |
|
"rewards/margins": 21.10441780090332, |
|
"rewards/real": -7.753486633300781, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1878124256129493e-07, |
|
"logits/generated": -1.404447317123413, |
|
"logits/real": -1.4850072860717773, |
|
"logps/generated": -646.6796875, |
|
"logps/real": -382.3814697265625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.619770050048828, |
|
"rewards/margins": 19.931453704833984, |
|
"rewards/real": -7.688315391540527, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.175910497500595e-07, |
|
"logits/generated": -1.465595006942749, |
|
"logits/real": -1.5086153745651245, |
|
"logps/generated": -713.1724243164062, |
|
"logps/real": -460.9185485839844, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.117782592773438, |
|
"rewards/margins": 21.637718200683594, |
|
"rewards/real": -8.480066299438477, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1640085693882409e-07, |
|
"logits/generated": -1.3850997686386108, |
|
"logits/real": -1.4538679122924805, |
|
"logps/generated": -767.3414306640625, |
|
"logps/real": -462.62030029296875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.52153396606445, |
|
"rewards/margins": 23.91811180114746, |
|
"rewards/real": -9.603418350219727, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1521066412758866e-07, |
|
"logits/generated": -1.4143835306167603, |
|
"logits/real": -1.5478150844573975, |
|
"logps/generated": -652.1123657226562, |
|
"logps/real": -392.4671630859375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.432504653930664, |
|
"rewards/margins": 21.238384246826172, |
|
"rewards/real": -9.194117546081543, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1402047131635324e-07, |
|
"logits/generated": -1.4064255952835083, |
|
"logits/real": -1.4616397619247437, |
|
"logps/generated": -666.3732299804688, |
|
"logps/real": -342.7879333496094, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.1993465423584, |
|
"rewards/margins": 22.13344955444336, |
|
"rewards/real": -7.0659003257751465, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1283027850511783e-07, |
|
"logits/generated": -1.3749884366989136, |
|
"logits/real": -1.4327641725540161, |
|
"logps/generated": -699.9730224609375, |
|
"logps/real": -428.4892578125, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -31.05487060546875, |
|
"rewards/margins": 22.448055267333984, |
|
"rewards/real": -8.606815338134766, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.116400856938824e-07, |
|
"logits/generated": -1.3594131469726562, |
|
"logits/real": -1.515339732170105, |
|
"logps/generated": -683.3230590820312, |
|
"logps/real": -435.6929626464844, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.349924087524414, |
|
"rewards/margins": 22.398181915283203, |
|
"rewards/real": -7.951746940612793, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1044989288264698e-07, |
|
"logits/generated": -1.3991297483444214, |
|
"logits/real": -1.5862958431243896, |
|
"logps/generated": -739.5543212890625, |
|
"logps/real": -410.45501708984375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -30.497798919677734, |
|
"rewards/margins": 21.733530044555664, |
|
"rewards/real": -8.764264106750488, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0925970007141157e-07, |
|
"logits/generated": -1.4209661483764648, |
|
"logits/real": -1.554810643196106, |
|
"logps/generated": -690.9508056640625, |
|
"logps/real": -424.572265625, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.3098201751709, |
|
"rewards/margins": 19.752222061157227, |
|
"rewards/real": -8.557598114013672, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0806950726017615e-07, |
|
"logits/generated": -1.4447872638702393, |
|
"logits/real": -1.6798250675201416, |
|
"logps/generated": -628.6717529296875, |
|
"logps/real": -394.5265808105469, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.025936126708984, |
|
"rewards/margins": 20.235803604125977, |
|
"rewards/real": -7.790134429931641, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0687931444894072e-07, |
|
"logits/generated": -1.532257318496704, |
|
"logits/real": -1.6047290563583374, |
|
"logps/generated": -702.6226806640625, |
|
"logps/real": -396.7612609863281, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -30.113338470458984, |
|
"rewards/margins": 22.376953125, |
|
"rewards/real": -7.73638916015625, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.056891216377053e-07, |
|
"logits/generated": -1.4834333658218384, |
|
"logits/real": -1.5966551303863525, |
|
"logps/generated": -713.3619995117188, |
|
"logps/real": -440.79095458984375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.308202743530273, |
|
"rewards/margins": 21.465810775756836, |
|
"rewards/real": -7.842390537261963, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0449892882646988e-07, |
|
"logits/generated": -1.524183988571167, |
|
"logits/real": -1.608907699584961, |
|
"logps/generated": -710.7420654296875, |
|
"logps/real": -489.75665283203125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.5823917388916, |
|
"rewards/margins": 21.41897201538086, |
|
"rewards/real": -8.163420677185059, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0330873601523445e-07, |
|
"logits/generated": -1.4193501472473145, |
|
"logits/real": -1.5115816593170166, |
|
"logps/generated": -760.3106689453125, |
|
"logps/real": -442.89898681640625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.945148468017578, |
|
"rewards/margins": 22.820213317871094, |
|
"rewards/real": -8.124935150146484, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0211854320399904e-07, |
|
"logits/generated": -1.339179277420044, |
|
"logits/real": -1.4539612531661987, |
|
"logps/generated": -684.4679565429688, |
|
"logps/real": -414.834716796875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.45599937438965, |
|
"rewards/margins": 22.370563507080078, |
|
"rewards/real": -8.085436820983887, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0092835039276362e-07, |
|
"logits/generated": -1.4860432147979736, |
|
"logits/real": -1.602423071861267, |
|
"logps/generated": -673.7472534179688, |
|
"logps/real": -382.69085693359375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.125503540039062, |
|
"rewards/margins": 23.682289123535156, |
|
"rewards/real": -6.443214416503906, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.973815758152821e-08, |
|
"logits/generated": -1.5302735567092896, |
|
"logits/real": -1.5981000661849976, |
|
"logps/generated": -623.1529541015625, |
|
"logps/real": -385.8594970703125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.0240478515625, |
|
"rewards/margins": 20.35898208618164, |
|
"rewards/real": -6.665063381195068, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.854796477029279e-08, |
|
"logits/generated": -1.557305932044983, |
|
"logits/real": -1.6370735168457031, |
|
"logps/generated": -734.8917846679688, |
|
"logps/real": -417.2500915527344, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.936962127685547, |
|
"rewards/margins": 23.65032386779785, |
|
"rewards/real": -7.286639213562012, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.735777195905736e-08, |
|
"logits/generated": -1.5963976383209229, |
|
"logits/real": -1.646400809288025, |
|
"logps/generated": -621.7203979492188, |
|
"logps/real": -350.34271240234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.403676986694336, |
|
"rewards/margins": 20.56957244873047, |
|
"rewards/real": -6.834105491638184, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.616757914782195e-08, |
|
"logits/generated": -1.6388921737670898, |
|
"logits/real": -1.738226294517517, |
|
"logps/generated": -675.3317260742188, |
|
"logps/real": -426.2586975097656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.32644271850586, |
|
"rewards/margins": 20.16423797607422, |
|
"rewards/real": -7.162204742431641, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.497738633658653e-08, |
|
"logits/generated": -1.472826600074768, |
|
"logits/real": -1.6667178869247437, |
|
"logps/generated": -635.9212036132812, |
|
"logps/real": -377.46685791015625, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.496978759765625, |
|
"rewards/margins": 20.11826515197754, |
|
"rewards/real": -7.3787126541137695, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.378719352535109e-08, |
|
"logits/generated": -1.475178837776184, |
|
"logits/real": -1.5875985622406006, |
|
"logps/generated": -704.7142944335938, |
|
"logps/real": -400.84521484375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.746139526367188, |
|
"rewards/margins": 20.98758316040039, |
|
"rewards/real": -8.758556365966797, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.259700071411568e-08, |
|
"logits/generated": -1.5485643148422241, |
|
"logits/real": -1.5439013242721558, |
|
"logps/generated": -741.1964111328125, |
|
"logps/real": -408.63330078125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.4178409576416, |
|
"rewards/margins": 23.00247573852539, |
|
"rewards/real": -7.415367126464844, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.140680790288026e-08, |
|
"logits/generated": -1.4215214252471924, |
|
"logits/real": -1.5626184940338135, |
|
"logps/generated": -667.1637573242188, |
|
"logps/real": -382.75238037109375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -29.500041961669922, |
|
"rewards/margins": 21.99424934387207, |
|
"rewards/real": -7.505797386169434, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.021661509164484e-08, |
|
"logits/generated": -1.4949233531951904, |
|
"logits/real": -1.5658118724822998, |
|
"logps/generated": -680.2150268554688, |
|
"logps/real": -470.9717712402344, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.54837989807129, |
|
"rewards/margins": 21.074321746826172, |
|
"rewards/real": -7.474058628082275, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.902642228040942e-08, |
|
"logits/generated": -1.4464380741119385, |
|
"logits/real": -1.5153313875198364, |
|
"logps/generated": -716.2271728515625, |
|
"logps/real": -413.4552307128906, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.981014251708984, |
|
"rewards/margins": 22.682100296020508, |
|
"rewards/real": -7.298914909362793, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.7836229469174e-08, |
|
"logits/generated": -1.4447425603866577, |
|
"logits/real": -1.56985342502594, |
|
"logps/generated": -757.6671142578125, |
|
"logps/real": -408.1660461425781, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.84424591064453, |
|
"rewards/margins": 24.225845336914062, |
|
"rewards/real": -8.618400573730469, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.664603665793858e-08, |
|
"logits/generated": -1.6285253763198853, |
|
"logits/real": -1.7806179523468018, |
|
"logps/generated": -676.8280029296875, |
|
"logps/real": -428.38995361328125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.158214569091797, |
|
"rewards/margins": 20.443370819091797, |
|
"rewards/real": -6.714838981628418, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.545584384670317e-08, |
|
"logits/generated": -1.581805944442749, |
|
"logits/real": -1.6949933767318726, |
|
"logps/generated": -665.0941162109375, |
|
"logps/real": -385.68133544921875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.212072372436523, |
|
"rewards/margins": 21.672542572021484, |
|
"rewards/real": -6.539525508880615, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.426565103546774e-08, |
|
"logits/generated": -1.4136435985565186, |
|
"logits/real": -1.5828819274902344, |
|
"logps/generated": -684.5712280273438, |
|
"logps/real": -426.00726318359375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.231287002563477, |
|
"rewards/margins": 20.253149032592773, |
|
"rewards/real": -6.978137016296387, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.307545822423233e-08, |
|
"logits/generated": -1.4507461786270142, |
|
"logits/real": -1.5602095127105713, |
|
"logps/generated": -665.6134033203125, |
|
"logps/real": -367.7745361328125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.6043701171875, |
|
"rewards/margins": 20.994644165039062, |
|
"rewards/real": -6.6097259521484375, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.18852654129969e-08, |
|
"logits/generated": -1.6215112209320068, |
|
"logits/real": -1.6469926834106445, |
|
"logps/generated": -666.5162963867188, |
|
"logps/real": -402.77227783203125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.933940887451172, |
|
"rewards/margins": 20.598087310791016, |
|
"rewards/real": -7.335852146148682, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.069507260176147e-08, |
|
"logits/generated": -1.5119495391845703, |
|
"logits/real": -1.6007074117660522, |
|
"logps/generated": -637.7647094726562, |
|
"logps/real": -355.0984802246094, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.141719818115234, |
|
"rewards/margins": 20.317394256591797, |
|
"rewards/real": -6.8243231773376465, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.950487979052606e-08, |
|
"logits/generated": -1.5609657764434814, |
|
"logits/real": -1.6627038717269897, |
|
"logps/generated": -708.6666259765625, |
|
"logps/real": -415.657470703125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.411911010742188, |
|
"rewards/margins": 22.86978530883789, |
|
"rewards/real": -7.542126655578613, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.831468697929064e-08, |
|
"logits/generated": -1.4143073558807373, |
|
"logits/real": -1.4475321769714355, |
|
"logps/generated": -650.6522216796875, |
|
"logps/real": -392.54345703125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.745223999023438, |
|
"rewards/margins": 20.270931243896484, |
|
"rewards/real": -5.474294185638428, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.712449416805522e-08, |
|
"logits/generated": -1.4251785278320312, |
|
"logits/real": -1.529900074005127, |
|
"logps/generated": -684.9102172851562, |
|
"logps/real": -411.81207275390625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.357402801513672, |
|
"rewards/margins": 19.91077995300293, |
|
"rewards/real": -6.4466233253479, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.59343013568198e-08, |
|
"logits/generated": -1.4817150831222534, |
|
"logits/real": -1.5895212888717651, |
|
"logps/generated": -647.0018310546875, |
|
"logps/real": -350.5855407714844, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.597564697265625, |
|
"rewards/margins": 23.217004776000977, |
|
"rewards/real": -6.38055944442749, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.474410854558438e-08, |
|
"logits/generated": -1.5963512659072876, |
|
"logits/real": -1.637025237083435, |
|
"logps/generated": -596.042724609375, |
|
"logps/real": -327.88970947265625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.45895767211914, |
|
"rewards/margins": 19.825878143310547, |
|
"rewards/real": -6.633078098297119, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.355391573434896e-08, |
|
"logits/generated": -1.638891577720642, |
|
"logits/real": -1.7146186828613281, |
|
"logps/generated": -687.0392456054688, |
|
"logps/real": -365.47528076171875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.7447509765625, |
|
"rewards/margins": 20.9066219329834, |
|
"rewards/real": -6.838125705718994, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.236372292311355e-08, |
|
"logits/generated": -1.4594347476959229, |
|
"logits/real": -1.6632425785064697, |
|
"logps/generated": -696.3617553710938, |
|
"logps/real": -408.8171081542969, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.696395874023438, |
|
"rewards/margins": 20.56673812866211, |
|
"rewards/real": -7.129660129547119, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.117353011187813e-08, |
|
"logits/generated": -1.4063997268676758, |
|
"logits/real": -1.4948168992996216, |
|
"logps/generated": -661.9642333984375, |
|
"logps/real": -434.97442626953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.784564971923828, |
|
"rewards/margins": 17.620466232299805, |
|
"rewards/real": -8.164094924926758, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.998333730064269e-08, |
|
"logits/generated": -1.438319444656372, |
|
"logits/real": -1.5320520401000977, |
|
"logps/generated": -679.9823608398438, |
|
"logps/real": -425.42413330078125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.671300888061523, |
|
"rewards/margins": 21.72645378112793, |
|
"rewards/real": -6.944846153259277, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.879314448940728e-08, |
|
"logits/generated": -1.365595817565918, |
|
"logits/real": -1.505392074584961, |
|
"logps/generated": -624.4268798828125, |
|
"logps/real": -387.62554931640625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.39801597595215, |
|
"rewards/margins": 20.606674194335938, |
|
"rewards/real": -6.791341304779053, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.760295167817185e-08, |
|
"logits/generated": -1.6268279552459717, |
|
"logits/real": -1.6268571615219116, |
|
"logps/generated": -725.8529052734375, |
|
"logps/real": -467.90618896484375, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.912384033203125, |
|
"rewards/margins": 20.454193115234375, |
|
"rewards/real": -6.45819616317749, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.641275886693644e-08, |
|
"logits/generated": -1.4818575382232666, |
|
"logits/real": -1.5495567321777344, |
|
"logps/generated": -605.5071411132812, |
|
"logps/real": -372.8101501464844, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.420181274414062, |
|
"rewards/margins": 19.029685974121094, |
|
"rewards/real": -7.390494346618652, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.522256605570102e-08, |
|
"logits/generated": -1.5707738399505615, |
|
"logits/real": -1.6774705648422241, |
|
"logps/generated": -636.1964111328125, |
|
"logps/real": -391.5029602050781, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.28677749633789, |
|
"rewards/margins": 21.367889404296875, |
|
"rewards/real": -5.918887138366699, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.40323732444656e-08, |
|
"logits/generated": -1.545809030532837, |
|
"logits/real": -1.5913245677947998, |
|
"logps/generated": -630.56640625, |
|
"logps/real": -422.04205322265625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.0881404876709, |
|
"rewards/margins": 18.995553970336914, |
|
"rewards/real": -5.092586040496826, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.284218043323019e-08, |
|
"logits/generated": -1.59109365940094, |
|
"logits/real": -1.6737741231918335, |
|
"logps/generated": -642.7061157226562, |
|
"logps/real": -410.7779235839844, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.49496841430664, |
|
"rewards/margins": 19.26400375366211, |
|
"rewards/real": -6.230964660644531, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.165198762199476e-08, |
|
"logits/generated": -1.633514404296875, |
|
"logits/real": -1.619410514831543, |
|
"logps/generated": -639.6582641601562, |
|
"logps/real": -386.4176330566406, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.464160919189453, |
|
"rewards/margins": 18.57771110534668, |
|
"rewards/real": -5.886451721191406, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.046179481075934e-08, |
|
"logits/generated": -1.4455702304840088, |
|
"logits/real": -1.7037875652313232, |
|
"logps/generated": -664.431640625, |
|
"logps/real": -400.7237548828125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.038738250732422, |
|
"rewards/margins": 19.530742645263672, |
|
"rewards/real": -5.507995128631592, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.9271601999523916e-08, |
|
"logits/generated": -1.495924949645996, |
|
"logits/real": -1.673275351524353, |
|
"logps/generated": -645.5086669921875, |
|
"logps/real": -383.1039123535156, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.089937210083008, |
|
"rewards/margins": 19.57440185546875, |
|
"rewards/real": -5.515534400939941, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.80814091882885e-08, |
|
"logits/generated": -1.43355393409729, |
|
"logits/real": -1.5794459581375122, |
|
"logps/generated": -708.1492309570312, |
|
"logps/real": -422.21917724609375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.978546142578125, |
|
"rewards/margins": 21.33310317993164, |
|
"rewards/real": -5.645444393157959, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.689121637705308e-08, |
|
"logits/generated": -1.4713923931121826, |
|
"logits/real": -1.6138818264007568, |
|
"logps/generated": -568.523193359375, |
|
"logps/real": -390.0688171386719, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.389972686767578, |
|
"rewards/margins": 17.943958282470703, |
|
"rewards/real": -5.446010589599609, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.5701023565817666e-08, |
|
"logits/generated": -1.5411484241485596, |
|
"logits/real": -1.5925050973892212, |
|
"logps/generated": -566.3245849609375, |
|
"logps/real": -389.99395751953125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.01774787902832, |
|
"rewards/margins": 18.14521598815918, |
|
"rewards/real": -4.872531890869141, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4510830754582236e-08, |
|
"logits/generated": -1.5211843252182007, |
|
"logits/real": -1.6060224771499634, |
|
"logps/generated": -618.6573486328125, |
|
"logps/real": -381.83154296875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.276790618896484, |
|
"rewards/margins": 20.00619888305664, |
|
"rewards/real": -5.270589351654053, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.332063794334682e-08, |
|
"logits/generated": -1.4857370853424072, |
|
"logits/real": -1.5991318225860596, |
|
"logps/generated": -628.9978637695312, |
|
"logps/real": -368.8316955566406, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.434362411499023, |
|
"rewards/margins": 20.840261459350586, |
|
"rewards/real": -4.59410285949707, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.21304451321114e-08, |
|
"logits/generated": -1.481233835220337, |
|
"logits/real": -1.616990089416504, |
|
"logps/generated": -612.8431396484375, |
|
"logps/real": -386.7208557128906, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.88302993774414, |
|
"rewards/margins": 19.562503814697266, |
|
"rewards/real": -4.32052755355835, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.0940252320875985e-08, |
|
"logits/generated": -1.543163537979126, |
|
"logits/real": -1.64615797996521, |
|
"logps/generated": -642.1202392578125, |
|
"logps/real": -387.37042236328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.703163146972656, |
|
"rewards/margins": 18.479257583618164, |
|
"rewards/real": -4.223905086517334, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.975005950964056e-08, |
|
"logits/generated": -1.5782445669174194, |
|
"logits/real": -1.6280380487442017, |
|
"logps/generated": -670.2909545898438, |
|
"logps/real": -409.8173828125, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.616764068603516, |
|
"rewards/margins": 20.43697738647461, |
|
"rewards/real": -5.1797871589660645, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.855986669840514e-08, |
|
"logits/generated": -1.5900815725326538, |
|
"logits/real": -1.621788740158081, |
|
"logps/generated": -621.515625, |
|
"logps/real": -400.93658447265625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.718387603759766, |
|
"rewards/margins": 18.437381744384766, |
|
"rewards/real": -6.281005859375, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.736967388716972e-08, |
|
"logits/generated": -1.5760804414749146, |
|
"logits/real": -1.6673088073730469, |
|
"logps/generated": -629.8677978515625, |
|
"logps/real": -367.3181457519531, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.90537452697754, |
|
"rewards/margins": 19.712932586669922, |
|
"rewards/real": -6.192440509796143, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.61794810759343e-08, |
|
"logits/generated": -1.5383670330047607, |
|
"logits/real": -1.6774461269378662, |
|
"logps/generated": -668.8092041015625, |
|
"logps/real": -413.6837463378906, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.660079956054688, |
|
"rewards/margins": 20.008989334106445, |
|
"rewards/real": -4.65109395980835, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.498928826469888e-08, |
|
"logits/generated": -1.516428828239441, |
|
"logits/real": -1.6732898950576782, |
|
"logps/generated": -604.8695068359375, |
|
"logps/real": -424.62481689453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.037944793701172, |
|
"rewards/margins": 18.540569305419922, |
|
"rewards/real": -4.497374534606934, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.3799095453463464e-08, |
|
"logits/generated": -1.5560497045516968, |
|
"logits/real": -1.6302626132965088, |
|
"logps/generated": -725.353271484375, |
|
"logps/real": -401.71990966796875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.72428321838379, |
|
"rewards/margins": 20.166976928710938, |
|
"rewards/real": -5.557308197021484, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2608902642228033e-08, |
|
"logits/generated": -1.5148546695709229, |
|
"logits/real": -1.635724425315857, |
|
"logps/generated": -560.277587890625, |
|
"logps/real": -350.01739501953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.92128562927246, |
|
"rewards/margins": 17.64234733581543, |
|
"rewards/real": -5.2789411544799805, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.1418709830992617e-08, |
|
"logits/generated": -1.5678379535675049, |
|
"logits/real": -1.6492674350738525, |
|
"logps/generated": -651.1310424804688, |
|
"logps/real": -392.8611755371094, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.585779190063477, |
|
"rewards/margins": 17.906591415405273, |
|
"rewards/real": -5.679187297821045, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.02285170197572e-08, |
|
"logits/generated": -1.5977767705917358, |
|
"logits/real": -1.6969823837280273, |
|
"logps/generated": -596.7791748046875, |
|
"logps/real": -366.9782409667969, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.06852149963379, |
|
"rewards/margins": 18.731252670288086, |
|
"rewards/real": -5.3372673988342285, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903832420852178e-08, |
|
"logits/generated": -1.5980104207992554, |
|
"logits/real": -1.624751329421997, |
|
"logps/generated": -634.3136596679688, |
|
"logps/real": -421.5874938964844, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.012256622314453, |
|
"rewards/margins": 19.190711975097656, |
|
"rewards/real": -5.82154655456543, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.784813139728636e-08, |
|
"logits/generated": -1.544721007347107, |
|
"logits/real": -1.617582082748413, |
|
"logps/generated": -652.9552001953125, |
|
"logps/real": -342.71917724609375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -27.350351333618164, |
|
"rewards/margins": 22.945491790771484, |
|
"rewards/real": -4.404857635498047, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6657938586050936e-08, |
|
"logits/generated": -1.4935457706451416, |
|
"logits/real": -1.6021337509155273, |
|
"logps/generated": -588.6372680664062, |
|
"logps/real": -402.85235595703125, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.209640502929688, |
|
"rewards/margins": 19.01520347595215, |
|
"rewards/real": -4.19443416595459, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.546774577481552e-08, |
|
"logits/generated": -1.4029728174209595, |
|
"logits/real": -1.4847666025161743, |
|
"logps/generated": -645.6001586914062, |
|
"logps/real": -414.8529357910156, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.182071685791016, |
|
"rewards/margins": 19.215055465698242, |
|
"rewards/real": -5.967015266418457, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.42775529635801e-08, |
|
"logits/generated": -1.5726209878921509, |
|
"logits/real": -1.7626521587371826, |
|
"logps/generated": -664.4998779296875, |
|
"logps/real": -424.19140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.57777976989746, |
|
"rewards/margins": 19.955883026123047, |
|
"rewards/real": -4.621894359588623, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.308736015234468e-08, |
|
"logits/generated": -1.5751293897628784, |
|
"logits/real": -1.6218827962875366, |
|
"logps/generated": -606.050048828125, |
|
"logps/real": -415.70599365234375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.84659767150879, |
|
"rewards/margins": 18.291515350341797, |
|
"rewards/real": -5.555081844329834, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.189716734110926e-08, |
|
"logits/generated": -1.5750287771224976, |
|
"logits/real": -1.6524326801300049, |
|
"logps/generated": -662.5819702148438, |
|
"logps/real": -409.240966796875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.22734832763672, |
|
"rewards/margins": 19.554428100585938, |
|
"rewards/real": -4.6729207038879395, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.070697452987384e-08, |
|
"logits/generated": -1.434731125831604, |
|
"logits/real": -1.5952690839767456, |
|
"logps/generated": -627.177001953125, |
|
"logps/real": -401.0657653808594, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.574573516845703, |
|
"rewards/margins": 19.57662582397461, |
|
"rewards/real": -4.99794864654541, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9516781718638418e-08, |
|
"logits/generated": -1.5248663425445557, |
|
"logits/real": -1.643877387046814, |
|
"logps/generated": -664.0775146484375, |
|
"logps/real": -432.7522888183594, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.960874557495117, |
|
"rewards/margins": 19.798603057861328, |
|
"rewards/real": -5.162272930145264, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8326588907402998e-08, |
|
"logits/generated": -1.6145492792129517, |
|
"logits/real": -1.6489204168319702, |
|
"logps/generated": -638.4390869140625, |
|
"logps/real": -405.95135498046875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.591712951660156, |
|
"rewards/margins": 19.004825592041016, |
|
"rewards/real": -5.586886882781982, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7136396096167577e-08, |
|
"logits/generated": -1.5042235851287842, |
|
"logits/real": -1.6028741598129272, |
|
"logps/generated": -621.9290161132812, |
|
"logps/real": -364.36456298828125, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.292139053344727, |
|
"rewards/margins": 20.4296817779541, |
|
"rewards/real": -5.862456321716309, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.5946203284932157e-08, |
|
"logits/generated": -1.5380438566207886, |
|
"logits/real": -1.6364552974700928, |
|
"logps/generated": -618.8900756835938, |
|
"logps/real": -363.16387939453125, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.624120712280273, |
|
"rewards/margins": 20.857669830322266, |
|
"rewards/real": -5.766448974609375, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.475601047369674e-08, |
|
"logits/generated": -1.3604224920272827, |
|
"logits/real": -1.5043797492980957, |
|
"logps/generated": -601.212158203125, |
|
"logps/real": -355.81329345703125, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -25.482114791870117, |
|
"rewards/margins": 20.78329086303711, |
|
"rewards/real": -4.698822021484375, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3565817662461317e-08, |
|
"logits/generated": -1.4521681070327759, |
|
"logits/real": -1.6199442148208618, |
|
"logps/generated": -686.1697387695312, |
|
"logps/real": -380.40020751953125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.074283599853516, |
|
"rewards/margins": 22.777790069580078, |
|
"rewards/real": -5.296494007110596, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2375624851225897e-08, |
|
"logits/generated": -1.5777462720870972, |
|
"logits/real": -1.625754714012146, |
|
"logps/generated": -577.4441528320312, |
|
"logps/real": -390.99676513671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.376705169677734, |
|
"rewards/margins": 17.67047119140625, |
|
"rewards/real": -4.706236839294434, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1185432039990476e-08, |
|
"logits/generated": -1.5715150833129883, |
|
"logits/real": -1.6618340015411377, |
|
"logps/generated": -650.3167724609375, |
|
"logps/real": -370.7663269042969, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.569076538085938, |
|
"rewards/margins": 20.809871673583984, |
|
"rewards/real": -5.7592034339904785, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9995239228755056e-08, |
|
"logits/generated": -1.4017701148986816, |
|
"logits/real": -1.5704150199890137, |
|
"logps/generated": -623.8563232421875, |
|
"logps/real": -330.9757080078125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.09404945373535, |
|
"rewards/margins": 21.123863220214844, |
|
"rewards/real": -5.970187187194824, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.880504641751964e-08, |
|
"logits/generated": -1.569045066833496, |
|
"logits/real": -1.689866065979004, |
|
"logps/generated": -648.3955078125, |
|
"logps/real": -410.296630859375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.370418548583984, |
|
"rewards/margins": 19.45819091796875, |
|
"rewards/real": -4.912228584289551, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7614853606284216e-08, |
|
"logits/generated": -1.5325770378112793, |
|
"logits/real": -1.65109384059906, |
|
"logps/generated": -704.2975463867188, |
|
"logps/real": -418.112060546875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.44858169555664, |
|
"rewards/margins": 21.82914161682129, |
|
"rewards/real": -4.619443893432617, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.64246607950488e-08, |
|
"logits/generated": -1.569603681564331, |
|
"logits/real": -1.6199992895126343, |
|
"logps/generated": -633.3504028320312, |
|
"logps/real": -405.2867126464844, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.51732635498047, |
|
"rewards/margins": 19.26373863220215, |
|
"rewards/real": -5.253589153289795, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.523446798381338e-08, |
|
"logits/generated": -1.538140892982483, |
|
"logits/real": -1.5134851932525635, |
|
"logps/generated": -599.3333740234375, |
|
"logps/real": -339.6556091308594, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.23720932006836, |
|
"rewards/margins": 20.19225311279297, |
|
"rewards/real": -6.044954299926758, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4044275172577957e-08, |
|
"logits/generated": -1.576467752456665, |
|
"logits/real": -1.7410484552383423, |
|
"logps/generated": -633.7058715820312, |
|
"logps/real": -355.918212890625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.02840232849121, |
|
"rewards/margins": 21.529172897338867, |
|
"rewards/real": -4.499229907989502, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2854082361342537e-08, |
|
"logits/generated": -1.480687141418457, |
|
"logits/real": -1.6588733196258545, |
|
"logps/generated": -614.5526733398438, |
|
"logps/real": -412.12744140625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.828638076782227, |
|
"rewards/margins": 19.214387893676758, |
|
"rewards/real": -5.614253997802734, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1663889550107118e-08, |
|
"logits/generated": -1.5496861934661865, |
|
"logits/real": -1.7427030801773071, |
|
"logps/generated": -634.1248168945312, |
|
"logps/real": -392.64141845703125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.263072967529297, |
|
"rewards/margins": 21.099870681762695, |
|
"rewards/real": -5.163203716278076, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0473696738871698e-08, |
|
"logits/generated": -1.5476195812225342, |
|
"logits/real": -1.6109368801116943, |
|
"logps/generated": -661.4473876953125, |
|
"logps/real": -491.182373046875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.115245819091797, |
|
"rewards/margins": 19.50821304321289, |
|
"rewards/real": -6.607035160064697, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.283503927636276e-09, |
|
"logits/generated": -1.514695405960083, |
|
"logits/real": -1.6349430084228516, |
|
"logps/generated": -645.2971801757812, |
|
"logps/real": -423.22979736328125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.907445907592773, |
|
"rewards/margins": 20.990873336791992, |
|
"rewards/real": -4.916577339172363, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.093311116400856e-09, |
|
"logits/generated": -1.4776127338409424, |
|
"logits/real": -1.5790631771087646, |
|
"logps/generated": -644.6243286132812, |
|
"logps/real": -405.47015380859375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.59290885925293, |
|
"rewards/margins": 19.970502853393555, |
|
"rewards/real": -5.6224045753479, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.903118305165436e-09, |
|
"logits/generated": -1.551004409790039, |
|
"logits/real": -1.6629527807235718, |
|
"logps/generated": -686.5955810546875, |
|
"logps/real": -435.12750244140625, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.463531494140625, |
|
"rewards/margins": 19.223169326782227, |
|
"rewards/real": -6.240363597869873, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.712925493930016e-09, |
|
"logits/generated": -1.4893418550491333, |
|
"logits/real": -1.675402283668518, |
|
"logps/generated": -659.8289184570312, |
|
"logps/real": -389.3727722167969, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.647533416748047, |
|
"rewards/margins": 21.779216766357422, |
|
"rewards/real": -5.868315696716309, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.522732682694597e-09, |
|
"logits/generated": -1.5567461252212524, |
|
"logits/real": -1.7037324905395508, |
|
"logps/generated": -679.4031982421875, |
|
"logps/real": -418.8092346191406, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.965381622314453, |
|
"rewards/margins": 18.415084838867188, |
|
"rewards/real": -5.550297737121582, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.332539871459176e-09, |
|
"logits/generated": -1.3903148174285889, |
|
"logits/real": -1.492148756980896, |
|
"logps/generated": -663.8366088867188, |
|
"logps/real": -389.20098876953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.28782081604004, |
|
"rewards/margins": 21.74622344970703, |
|
"rewards/real": -5.541598796844482, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.1423470602237564e-09, |
|
"logits/generated": -1.5899990797042847, |
|
"logits/real": -1.632591962814331, |
|
"logps/generated": -686.7926025390625, |
|
"logps/real": -420.3905334472656, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.265411376953125, |
|
"rewards/margins": 21.27739143371582, |
|
"rewards/real": -4.988020420074463, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.521542489883362e-10, |
|
"logits/generated": -1.5567301511764526, |
|
"logits/real": -1.6518385410308838, |
|
"logps/generated": -666.6358642578125, |
|
"logps/real": -394.54815673828125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.553237915039062, |
|
"rewards/margins": 22.035541534423828, |
|
"rewards/real": -4.517697811126709, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4668, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04980033338522684, |
|
"train_runtime": 39160.4052, |
|
"train_samples_per_second": 3.814, |
|
"train_steps_per_second": 0.119 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4668, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|