|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99968, |
|
"eval_steps": 300, |
|
"global_step": 1562, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.1847133757961784e-09, |
|
"logits/generated": 0.17403794825077057, |
|
"logits/real": -0.19873479008674622, |
|
"logps/generated": -514.6786499023438, |
|
"logps/real": -454.4227294921875, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.592356687898089e-08, |
|
"logits/generated": 0.1870264708995819, |
|
"logits/real": -0.25900763273239136, |
|
"logps/generated": -526.2941284179688, |
|
"logps/real": -490.1958923339844, |
|
"loss": 0.1507, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/generated": -0.0001907353289425373, |
|
"rewards/margins": 0.0010350469965487719, |
|
"rewards/real": 0.0008443119004368782, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logits/generated": 0.1984216719865799, |
|
"logits/real": -0.26348429918289185, |
|
"logps/generated": -505.8414611816406, |
|
"logps/real": -491.9839782714844, |
|
"loss": 0.151, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/generated": 0.006021722219884396, |
|
"rewards/margins": -0.0053602345287799835, |
|
"rewards/real": 0.000661488447804004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.777070063694268e-08, |
|
"logits/generated": 0.17304742336273193, |
|
"logits/real": -0.21200346946716309, |
|
"logps/generated": -538.6023559570312, |
|
"logps/real": -442.2162170410156, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/generated": 0.0030143880285322666, |
|
"rewards/margins": -0.0018971532117575407, |
|
"rewards/real": 0.0011172344675287604, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logits/generated": 0.1914023607969284, |
|
"logits/real": -0.22652260959148407, |
|
"logps/generated": -523.87451171875, |
|
"logps/real": -461.0634765625, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/generated": 0.0033149768132716417, |
|
"rewards/margins": 0.004283360205590725, |
|
"rewards/real": 0.007598336786031723, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.961783439490445e-08, |
|
"logits/generated": 0.18474087119102478, |
|
"logits/real": -0.2507132589817047, |
|
"logps/generated": -501.36993408203125, |
|
"logps/real": -479.93634033203125, |
|
"loss": 0.151, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/generated": 0.010729875415563583, |
|
"rewards/margins": -0.005274372640997171, |
|
"rewards/real": 0.005455502774566412, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logits/generated": 0.20316779613494873, |
|
"logits/real": -0.21709361672401428, |
|
"logps/generated": -534.5445556640625, |
|
"logps/real": -470.92254638671875, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/generated": 0.005789478309452534, |
|
"rewards/margins": -0.00014353431470226496, |
|
"rewards/real": 0.0056459433399140835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1146496815286624e-07, |
|
"logits/generated": 0.1715896874666214, |
|
"logits/real": -0.24265193939208984, |
|
"logps/generated": -517.3651123046875, |
|
"logps/real": -451.755615234375, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/generated": -0.0016872085398063064, |
|
"rewards/margins": 0.008004234172403812, |
|
"rewards/real": 0.006317025516182184, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logits/generated": 0.15530803799629211, |
|
"logits/real": -0.23413880169391632, |
|
"logps/generated": -523.96826171875, |
|
"logps/real": -444.64239501953125, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/generated": 0.0005639827577397227, |
|
"rewards/margins": 0.007568719331175089, |
|
"rewards/real": 0.008132701739668846, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.43312101910828e-07, |
|
"logits/generated": 0.1671060025691986, |
|
"logits/real": -0.22391800582408905, |
|
"logps/generated": -550.3038330078125, |
|
"logps/real": -439.77947998046875, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/generated": 0.004643476102501154, |
|
"rewards/margins": 0.001223881496116519, |
|
"rewards/real": 0.005867358297109604, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logits/generated": 0.17757461965084076, |
|
"logits/real": -0.24453218281269073, |
|
"logps/generated": -519.4007568359375, |
|
"logps/real": -458.79669189453125, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/generated": -0.00017246263450942934, |
|
"rewards/margins": 0.0037554509472101927, |
|
"rewards/real": 0.0035829886328428984, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7515923566878978e-07, |
|
"logits/generated": 0.18612250685691833, |
|
"logits/real": -0.21440061926841736, |
|
"logps/generated": -539.6683959960938, |
|
"logps/real": -446.758056640625, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": 0.002787713659927249, |
|
"rewards/margins": 0.008473692461848259, |
|
"rewards/real": 0.011261406354606152, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logits/generated": 0.169862300157547, |
|
"logits/real": -0.22989757359027863, |
|
"logps/generated": -498.18560791015625, |
|
"logps/real": -442.7648010253906, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/generated": -0.012974532321095467, |
|
"rewards/margins": 0.017906008288264275, |
|
"rewards/real": 0.004931476432830095, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.070063694267516e-07, |
|
"logits/generated": 0.19755002856254578, |
|
"logits/real": -0.23432913422584534, |
|
"logps/generated": -492.85595703125, |
|
"logps/real": -494.6519470214844, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/generated": -0.0050980569794774055, |
|
"rewards/margins": 0.018167994916439056, |
|
"rewards/real": 0.01306993793696165, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logits/generated": 0.16374804079532623, |
|
"logits/real": -0.25221407413482666, |
|
"logps/generated": -521.1048583984375, |
|
"logps/real": -445.358642578125, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/generated": -0.0037274628411978483, |
|
"rewards/margins": 0.011061501689255238, |
|
"rewards/real": 0.007334038615226746, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.388535031847134e-07, |
|
"logits/generated": 0.18414834141731262, |
|
"logits/real": -0.23798434436321259, |
|
"logps/generated": -539.92138671875, |
|
"logps/real": -464.05462646484375, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/generated": -0.008644538931548595, |
|
"rewards/margins": 0.01328897662460804, |
|
"rewards/real": 0.00464443676173687, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logits/generated": 0.18885524570941925, |
|
"logits/real": -0.2001829594373703, |
|
"logps/generated": -549.0367431640625, |
|
"logps/real": -452.0292053222656, |
|
"loss": 0.1493, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/generated": -0.005654294043779373, |
|
"rewards/margins": 0.023647544905543327, |
|
"rewards/real": 0.017993250861763954, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7070063694267513e-07, |
|
"logits/generated": 0.16570258140563965, |
|
"logits/real": -0.22671516239643097, |
|
"logps/generated": -527.3927001953125, |
|
"logps/real": -434.76971435546875, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/generated": -0.009726697579026222, |
|
"rewards/margins": 0.026334956288337708, |
|
"rewards/real": 0.016608258709311485, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logits/generated": 0.16409741342067719, |
|
"logits/real": -0.2301245927810669, |
|
"logps/generated": -514.0899658203125, |
|
"logps/real": -469.2464904785156, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/generated": -0.008600350469350815, |
|
"rewards/margins": 0.033345777541399, |
|
"rewards/real": 0.024745427072048187, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0254777070063694e-07, |
|
"logits/generated": 0.18098126351833344, |
|
"logits/real": -0.19588425755500793, |
|
"logps/generated": -511.7598571777344, |
|
"logps/real": -406.40911865234375, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/generated": -0.009649967774748802, |
|
"rewards/margins": 0.03349680081009865, |
|
"rewards/real": 0.023846831172704697, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logits/generated": 0.17149221897125244, |
|
"logits/real": -0.24450810253620148, |
|
"logps/generated": -530.5223388671875, |
|
"logps/real": -459.1062927246094, |
|
"loss": 0.1487, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/generated": -0.014194811694324017, |
|
"rewards/margins": 0.03900580853223801, |
|
"rewards/real": 0.024810992181301117, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.343949044585987e-07, |
|
"logits/generated": 0.19177064299583435, |
|
"logits/real": -0.1881466656923294, |
|
"logps/generated": -520.2600708007812, |
|
"logps/real": -438.0816345214844, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/generated": -0.01024587918072939, |
|
"rewards/margins": 0.04250962659716606, |
|
"rewards/real": 0.032263752073049545, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logits/generated": 0.16849389672279358, |
|
"logits/real": -0.2514097988605499, |
|
"logps/generated": -541.0388793945312, |
|
"logps/real": -480.9339904785156, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/generated": -0.008990215137600899, |
|
"rewards/margins": 0.041446976363658905, |
|
"rewards/real": 0.03245675936341286, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6624203821656045e-07, |
|
"logits/generated": 0.19658248126506805, |
|
"logits/real": -0.2052055150270462, |
|
"logps/generated": -525.2179565429688, |
|
"logps/real": -448.5086364746094, |
|
"loss": 0.1479, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/generated": -0.014174744486808777, |
|
"rewards/margins": 0.050183337181806564, |
|
"rewards/real": 0.03600858896970749, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logits/generated": 0.1979866474866867, |
|
"logits/real": -0.24727025628089905, |
|
"logps/generated": -540.0928955078125, |
|
"logps/real": -480.67047119140625, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/generated": -0.016895266249775887, |
|
"rewards/margins": 0.0549207404255867, |
|
"rewards/real": 0.03802547603845596, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.980891719745223e-07, |
|
"logits/generated": 0.21044392883777618, |
|
"logits/real": -0.2199120819568634, |
|
"logps/generated": -508.4944763183594, |
|
"logps/real": -454.1514587402344, |
|
"loss": 0.1476, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/generated": -0.013310949318110943, |
|
"rewards/margins": 0.055444687604904175, |
|
"rewards/real": 0.042133744806051254, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logits/generated": 0.19091640412807465, |
|
"logits/real": -0.23178955912590027, |
|
"logps/generated": -527.3304443359375, |
|
"logps/real": -436.0814514160156, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/generated": -0.013830204494297504, |
|
"rewards/margins": 0.05727874115109444, |
|
"rewards/real": 0.04344853386282921, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.2993630573248406e-07, |
|
"logits/generated": 0.17117801308631897, |
|
"logits/real": -0.2260015904903412, |
|
"logps/generated": -530.6170654296875, |
|
"logps/real": -450.876220703125, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.02331192046403885, |
|
"rewards/margins": 0.07541900128126144, |
|
"rewards/real": 0.052107084542512894, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logits/generated": 0.19855618476867676, |
|
"logits/real": -0.23370346426963806, |
|
"logps/generated": -541.4176025390625, |
|
"logps/real": -485.81463623046875, |
|
"loss": 0.146, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/generated": -0.01844293251633644, |
|
"rewards/margins": 0.08036132156848907, |
|
"rewards/real": 0.061918385326862335, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6178343949044587e-07, |
|
"logits/generated": 0.16755443811416626, |
|
"logits/real": -0.24783170223236084, |
|
"logps/generated": -502.64849853515625, |
|
"logps/real": -458.10296630859375, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/generated": -0.004401802085340023, |
|
"rewards/margins": 0.06796350330114365, |
|
"rewards/real": 0.06356170028448105, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logits/generated": 0.17052355408668518, |
|
"logits/real": -0.26604199409484863, |
|
"logps/generated": -535.1319580078125, |
|
"logps/real": -480.93682861328125, |
|
"loss": 0.1451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.021760020405054092, |
|
"rewards/margins": 0.09478209912776947, |
|
"rewards/real": 0.07302206754684448, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.936305732484076e-07, |
|
"logits/generated": 0.18140792846679688, |
|
"logits/real": -0.24586549401283264, |
|
"logps/generated": -495.56951904296875, |
|
"logps/real": -445.80426025390625, |
|
"loss": 0.1446, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/generated": -0.010536590591073036, |
|
"rewards/margins": 0.09246022999286652, |
|
"rewards/real": 0.08192362636327744, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.98932384341637e-07, |
|
"logits/generated": 0.18911007046699524, |
|
"logits/real": -0.2553741931915283, |
|
"logps/generated": -516.3770751953125, |
|
"logps/real": -471.3077087402344, |
|
"loss": 0.1446, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/generated": -0.018946032971143723, |
|
"rewards/margins": 0.10126407444477081, |
|
"rewards/real": 0.0823180302977562, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.97153024911032e-07, |
|
"logits/generated": 0.19667308032512665, |
|
"logits/real": -0.15881827473640442, |
|
"logps/generated": -534.4208374023438, |
|
"logps/real": -442.3114318847656, |
|
"loss": 0.1441, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/generated": -0.01733066886663437, |
|
"rewards/margins": 0.10644586384296417, |
|
"rewards/real": 0.0891151949763298, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.953736654804271e-07, |
|
"logits/generated": 0.17530401051044464, |
|
"logits/real": -0.19269545376300812, |
|
"logps/generated": -517.9406127929688, |
|
"logps/real": -430.65789794921875, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -0.014586791396141052, |
|
"rewards/margins": 0.11395077407360077, |
|
"rewards/real": 0.09936399012804031, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.935943060498221e-07, |
|
"logits/generated": 0.20905160903930664, |
|
"logits/real": -0.19150669872760773, |
|
"logps/generated": -523.1032104492188, |
|
"logps/real": -445.8037109375, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/generated": -0.021403489634394646, |
|
"rewards/margins": 0.13283729553222656, |
|
"rewards/real": 0.11143380403518677, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918149466192171e-07, |
|
"logits/generated": 0.20821765065193176, |
|
"logits/real": -0.18395385146141052, |
|
"logps/generated": -523.7911987304688, |
|
"logps/real": -444.244384765625, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/generated": -0.011090211570262909, |
|
"rewards/margins": 0.12333385646343231, |
|
"rewards/real": 0.1122436374425888, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.900355871886121e-07, |
|
"logits/generated": 0.2085740864276886, |
|
"logits/real": -0.18884018063545227, |
|
"logps/generated": -514.5075073242188, |
|
"logps/real": -455.51153564453125, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/generated": -0.008907720446586609, |
|
"rewards/margins": 0.13150587677955627, |
|
"rewards/real": 0.12259815633296967, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.882562277580071e-07, |
|
"logits/generated": 0.19080451130867004, |
|
"logits/real": -0.18134233355522156, |
|
"logps/generated": -535.5404052734375, |
|
"logps/real": -420.12054443359375, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/generated": -0.01622314192354679, |
|
"rewards/margins": 0.13654197752475739, |
|
"rewards/real": 0.12031883001327515, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.864768683274021e-07, |
|
"logits/generated": 0.19419755041599274, |
|
"logits/real": -0.2099919319152832, |
|
"logps/generated": -506.61297607421875, |
|
"logps/real": -456.75201416015625, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/generated": -0.005563541315495968, |
|
"rewards/margins": 0.13059118390083313, |
|
"rewards/real": 0.1250276416540146, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.846975088967971e-07, |
|
"logits/generated": 0.2110450267791748, |
|
"logits/real": -0.17780761420726776, |
|
"logps/generated": -504.53253173828125, |
|
"logps/real": -446.33013916015625, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/generated": -0.020040716975927353, |
|
"rewards/margins": 0.14582987129688263, |
|
"rewards/real": 0.12578915059566498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.829181494661922e-07, |
|
"logits/generated": 0.19535906612873077, |
|
"logits/real": -0.23168042302131653, |
|
"logps/generated": -529.9983520507812, |
|
"logps/real": -478.4393615722656, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/generated": -0.01576526090502739, |
|
"rewards/margins": 0.15367695689201355, |
|
"rewards/real": 0.13791170716285706, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.811387900355872e-07, |
|
"logits/generated": 0.22139854729175568, |
|
"logits/real": -0.18635587394237518, |
|
"logps/generated": -528.14404296875, |
|
"logps/real": -444.3465270996094, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/generated": -0.016462773084640503, |
|
"rewards/margins": 0.1693098396062851, |
|
"rewards/real": 0.1528470814228058, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.793594306049822e-07, |
|
"logits/generated": 0.16860654950141907, |
|
"logits/real": -0.26878511905670166, |
|
"logps/generated": -514.0562744140625, |
|
"logps/real": -470.86279296875, |
|
"loss": 0.14, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.03441483899950981, |
|
"rewards/margins": 0.1793655902147293, |
|
"rewards/real": 0.1449507623910904, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.775800711743772e-07, |
|
"logits/generated": 0.2018643319606781, |
|
"logits/real": -0.22842028737068176, |
|
"logps/generated": -509.4456481933594, |
|
"logps/real": -471.5657653808594, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/generated": -0.02802978828549385, |
|
"rewards/margins": 0.16557763516902924, |
|
"rewards/real": 0.1375478357076645, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.758007117437722e-07, |
|
"logits/generated": 0.19222237169742584, |
|
"logits/real": -0.17731007933616638, |
|
"logps/generated": -525.8961181640625, |
|
"logps/real": -429.556396484375, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/generated": -0.026082757860422134, |
|
"rewards/margins": 0.18103452026844025, |
|
"rewards/real": 0.15495173633098602, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7402135231316723e-07, |
|
"logits/generated": 0.20608314871788025, |
|
"logits/real": -0.196997731924057, |
|
"logps/generated": -517.6672973632812, |
|
"logps/real": -437.429931640625, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/generated": -0.026546889916062355, |
|
"rewards/margins": 0.18050511181354523, |
|
"rewards/real": 0.15395823121070862, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.722419928825623e-07, |
|
"logits/generated": 0.18294331431388855, |
|
"logits/real": -0.19551487267017365, |
|
"logps/generated": -539.04052734375, |
|
"logps/real": -430.70318603515625, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.03303760290145874, |
|
"rewards/margins": 0.21606886386871338, |
|
"rewards/real": 0.18303126096725464, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7046263345195726e-07, |
|
"logits/generated": 0.18518558144569397, |
|
"logits/real": -0.2292046993970871, |
|
"logps/generated": -509.7601623535156, |
|
"logps/real": -452.63836669921875, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/generated": -0.01840498112142086, |
|
"rewards/margins": 0.18604739010334015, |
|
"rewards/real": 0.16764239966869354, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.686832740213523e-07, |
|
"logits/generated": 0.19558075070381165, |
|
"logits/real": -0.22530385851860046, |
|
"logps/generated": -520.8900756835938, |
|
"logps/real": -470.57257080078125, |
|
"loss": 0.1386, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/generated": -0.027829840779304504, |
|
"rewards/margins": 0.19147595763206482, |
|
"rewards/real": 0.1636461317539215, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.669039145907473e-07, |
|
"logits/generated": 0.19283290207386017, |
|
"logits/real": -0.19199484586715698, |
|
"logps/generated": -538.2701416015625, |
|
"logps/real": -440.3575134277344, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/generated": -0.03322785347700119, |
|
"rewards/margins": 0.21201416850090027, |
|
"rewards/real": 0.17878632247447968, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.651245551601423e-07, |
|
"logits/generated": 0.21194609999656677, |
|
"logits/real": -0.15735319256782532, |
|
"logps/generated": -533.7866821289062, |
|
"logps/real": -436.27215576171875, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.030345138162374496, |
|
"rewards/margins": 0.21164409816265106, |
|
"rewards/real": 0.18129894137382507, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.633451957295373e-07, |
|
"logits/generated": 0.1993105113506317, |
|
"logits/real": -0.20154225826263428, |
|
"logps/generated": -525.4606323242188, |
|
"logps/real": -439.7225646972656, |
|
"loss": 0.1375, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.019895639270544052, |
|
"rewards/margins": 0.20292837917804718, |
|
"rewards/real": 0.18303272128105164, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.615658362989324e-07, |
|
"logits/generated": 0.17055116593837738, |
|
"logits/real": -0.22003407776355743, |
|
"logps/generated": -537.1515502929688, |
|
"logps/real": -442.8724670410156, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/generated": -0.027774950489401817, |
|
"rewards/margins": 0.21557831764221191, |
|
"rewards/real": 0.18780337274074554, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.597864768683274e-07, |
|
"logits/generated": 0.1934186965227127, |
|
"logits/real": -0.16990774869918823, |
|
"logps/generated": -537.012451171875, |
|
"logps/real": -409.61419677734375, |
|
"loss": 0.1366, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.03637081757187843, |
|
"rewards/margins": 0.23026354610919952, |
|
"rewards/real": 0.19389274716377258, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.580071174377224e-07, |
|
"logits/generated": 0.1830703169107437, |
|
"logits/real": -0.22130803763866425, |
|
"logps/generated": -523.718994140625, |
|
"logps/real": -452.82818603515625, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.036309439688920975, |
|
"rewards/margins": 0.230547234416008, |
|
"rewards/real": 0.19423779845237732, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5622775800711743e-07, |
|
"logits/generated": 0.20362897217273712, |
|
"logits/real": -0.2156350165605545, |
|
"logps/generated": -535.4857788085938, |
|
"logps/real": -458.9664001464844, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.032396264374256134, |
|
"rewards/margins": 0.22477588057518005, |
|
"rewards/real": 0.1923796534538269, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5444839857651244e-07, |
|
"logits/generated": 0.18986502289772034, |
|
"logits/real": -0.2057947814464569, |
|
"logps/generated": -524.5361328125, |
|
"logps/real": -449.27734375, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.039955057203769684, |
|
"rewards/margins": 0.25269564986228943, |
|
"rewards/real": 0.21274061501026154, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.526690391459074e-07, |
|
"logits/generated": 0.21750202775001526, |
|
"logits/real": -0.18711628019809723, |
|
"logps/generated": -533.2612915039062, |
|
"logps/real": -452.9281311035156, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.02985314093530178, |
|
"rewards/margins": 0.2417806088924408, |
|
"rewards/real": 0.21192745864391327, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5088967971530247e-07, |
|
"logits/generated": 0.1709345579147339, |
|
"logits/real": -0.2246251404285431, |
|
"logps/generated": -536.2772216796875, |
|
"logps/real": -442.66571044921875, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.03336368873715401, |
|
"rewards/margins": 0.24980218708515167, |
|
"rewards/real": 0.21643848717212677, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.491103202846975e-07, |
|
"logits/generated": 0.1961456835269928, |
|
"logits/real": -0.21015064418315887, |
|
"logps/generated": -516.0350341796875, |
|
"logps/real": -440.63934326171875, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.027470093220472336, |
|
"rewards/margins": 0.237370565533638, |
|
"rewards/real": 0.20990045368671417, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/generated": 0.19990801811218262, |
|
"eval_logits/real": -0.20589132606983185, |
|
"eval_logps/generated": -521.6344604492188, |
|
"eval_logps/real": -455.5482482910156, |
|
"eval_loss": 0.13489334285259247, |
|
"eval_rewards/accuracies": 0.9054999947547913, |
|
"eval_rewards/generated": -0.03345545381307602, |
|
"eval_rewards/margins": 0.25203606486320496, |
|
"eval_rewards/real": 0.21858063340187073, |
|
"eval_runtime": 445.7957, |
|
"eval_samples_per_second": 4.486, |
|
"eval_steps_per_second": 1.122, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.473309608540925e-07, |
|
"logits/generated": 0.17669954895973206, |
|
"logits/real": -0.22454991936683655, |
|
"logps/generated": -525.3490600585938, |
|
"logps/real": -443.7996520996094, |
|
"loss": 0.1344, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/generated": -0.03830927610397339, |
|
"rewards/margins": 0.2601231336593628, |
|
"rewards/real": 0.2218138426542282, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.455516014234875e-07, |
|
"logits/generated": 0.21098542213439941, |
|
"logits/real": -0.16925989091396332, |
|
"logps/generated": -529.2811889648438, |
|
"logps/real": -446.4693908691406, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.05097696930170059, |
|
"rewards/margins": 0.2641257643699646, |
|
"rewards/real": 0.213148832321167, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4377224199288253e-07, |
|
"logits/generated": 0.19224326312541962, |
|
"logits/real": -0.19469952583312988, |
|
"logps/generated": -522.6043090820312, |
|
"logps/real": -442.81683349609375, |
|
"loss": 0.1343, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.03429437428712845, |
|
"rewards/margins": 0.26410582661628723, |
|
"rewards/real": 0.22981147468090057, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4199288256227754e-07, |
|
"logits/generated": 0.1928466409444809, |
|
"logits/real": -0.20203690230846405, |
|
"logps/generated": -560.1980590820312, |
|
"logps/real": -428.7918395996094, |
|
"loss": 0.1333, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.035711534321308136, |
|
"rewards/margins": 0.2725156843662262, |
|
"rewards/real": 0.23680415749549866, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.402135231316726e-07, |
|
"logits/generated": 0.20243430137634277, |
|
"logits/real": -0.16769111156463623, |
|
"logps/generated": -504.7337951660156, |
|
"logps/real": -429.45672607421875, |
|
"loss": 0.1329, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.030326668173074722, |
|
"rewards/margins": 0.27559566497802734, |
|
"rewards/real": 0.2452690154314041, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3843416370106763e-07, |
|
"logits/generated": 0.2106173038482666, |
|
"logits/real": -0.22538356482982635, |
|
"logps/generated": -517.978759765625, |
|
"logps/real": -474.59735107421875, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.033002205193042755, |
|
"rewards/margins": 0.27451324462890625, |
|
"rewards/real": 0.2415110170841217, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3665480427046264e-07, |
|
"logits/generated": 0.21016332507133484, |
|
"logits/real": -0.2604514956474304, |
|
"logps/generated": -520.3605346679688, |
|
"logps/real": -490.40631103515625, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.044487275183200836, |
|
"rewards/margins": 0.2778502106666565, |
|
"rewards/real": 0.23336295783519745, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.348754448398576e-07, |
|
"logits/generated": 0.22852489352226257, |
|
"logits/real": -0.18696899712085724, |
|
"logps/generated": -523.6672973632812, |
|
"logps/real": -495.40057373046875, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.03802080079913139, |
|
"rewards/margins": 0.2772233188152313, |
|
"rewards/real": 0.23920252919197083, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.330960854092526e-07, |
|
"logits/generated": 0.17415957152843475, |
|
"logits/real": -0.19011852145195007, |
|
"logps/generated": -513.3272705078125, |
|
"logps/real": -403.3451232910156, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.045419953763484955, |
|
"rewards/margins": 0.2882402539253235, |
|
"rewards/real": 0.24282029271125793, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3131672597864763e-07, |
|
"logits/generated": 0.2256070077419281, |
|
"logits/real": -0.17997749149799347, |
|
"logps/generated": -534.0467529296875, |
|
"logps/real": -483.41937255859375, |
|
"loss": 0.1314, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.049169979989528656, |
|
"rewards/margins": 0.3096661865711212, |
|
"rewards/real": 0.26049619913101196, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.295373665480427e-07, |
|
"logits/generated": 0.2115151435136795, |
|
"logits/real": -0.1779278963804245, |
|
"logps/generated": -524.0692749023438, |
|
"logps/real": -455.04046630859375, |
|
"loss": 0.1326, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/generated": -0.034146808087825775, |
|
"rewards/margins": 0.2888898253440857, |
|
"rewards/real": 0.2547430098056793, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.277580071174377e-07, |
|
"logits/generated": 0.19143882393836975, |
|
"logits/real": -0.1881864219903946, |
|
"logps/generated": -514.4266357421875, |
|
"logps/real": -449.707763671875, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.028750786557793617, |
|
"rewards/margins": 0.27422600984573364, |
|
"rewards/real": 0.24547524750232697, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2597864768683273e-07, |
|
"logits/generated": 0.14517921209335327, |
|
"logits/real": -0.23158793151378632, |
|
"logps/generated": -524.9425048828125, |
|
"logps/real": -423.65985107421875, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.02722376585006714, |
|
"rewards/margins": 0.3096860945224762, |
|
"rewards/real": 0.28246229887008667, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2419928825622774e-07, |
|
"logits/generated": 0.19077686965465546, |
|
"logits/real": -0.206539124250412, |
|
"logps/generated": -510.04681396484375, |
|
"logps/real": -430.26513671875, |
|
"loss": 0.1325, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.028535595163702965, |
|
"rewards/margins": 0.28712087869644165, |
|
"rewards/real": 0.25858527421951294, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2241992882562276e-07, |
|
"logits/generated": 0.20149891078472137, |
|
"logits/real": -0.20975959300994873, |
|
"logps/generated": -516.36083984375, |
|
"logps/real": -456.06915283203125, |
|
"loss": 0.1315, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.03869970887899399, |
|
"rewards/margins": 0.3044109344482422, |
|
"rewards/real": 0.26571124792099, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2064056939501777e-07, |
|
"logits/generated": 0.20944401621818542, |
|
"logits/real": -0.20790605247020721, |
|
"logps/generated": -513.2515869140625, |
|
"logps/real": -470.3443298339844, |
|
"loss": 0.1333, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.038152649998664856, |
|
"rewards/margins": 0.2787010371685028, |
|
"rewards/real": 0.24054837226867676, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.188612099644128e-07, |
|
"logits/generated": 0.19982407987117767, |
|
"logits/real": -0.22643396258354187, |
|
"logps/generated": -531.8196411132812, |
|
"logps/real": -455.9195861816406, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.047723181545734406, |
|
"rewards/margins": 0.31384533643722534, |
|
"rewards/real": 0.26612216234207153, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.170818505338078e-07, |
|
"logits/generated": 0.18060657382011414, |
|
"logits/real": -0.2101529836654663, |
|
"logps/generated": -522.3262939453125, |
|
"logps/real": -430.184326171875, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.05798162892460823, |
|
"rewards/margins": 0.306007444858551, |
|
"rewards/real": 0.2480258047580719, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.153024911032028e-07, |
|
"logits/generated": 0.21487073600292206, |
|
"logits/real": -0.17722031474113464, |
|
"logps/generated": -508.89166259765625, |
|
"logps/real": -457.6886291503906, |
|
"loss": 0.132, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.033996812999248505, |
|
"rewards/margins": 0.30001968145370483, |
|
"rewards/real": 0.26602286100387573, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1352313167259783e-07, |
|
"logits/generated": 0.21322698891162872, |
|
"logits/real": -0.18131455779075623, |
|
"logps/generated": -532.228271484375, |
|
"logps/real": -413.98114013671875, |
|
"loss": 0.1313, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.039401743561029434, |
|
"rewards/margins": 0.30484768748283386, |
|
"rewards/real": 0.2654459774494171, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1174377224199284e-07, |
|
"logits/generated": 0.22022435069084167, |
|
"logits/real": -0.16830001771450043, |
|
"logps/generated": -524.0745849609375, |
|
"logps/real": -444.0736389160156, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.05002317577600479, |
|
"rewards/margins": 0.3070148527622223, |
|
"rewards/real": 0.2569916844367981, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.0996441281138786e-07, |
|
"logits/generated": 0.2300226241350174, |
|
"logits/real": -0.19325582683086395, |
|
"logps/generated": -515.1468505859375, |
|
"logps/real": -462.9017639160156, |
|
"loss": 0.1321, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/generated": -0.0361974723637104, |
|
"rewards/margins": 0.2977278232574463, |
|
"rewards/real": 0.261530339717865, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.081850533807829e-07, |
|
"logits/generated": 0.21782374382019043, |
|
"logits/real": -0.16392473876476288, |
|
"logps/generated": -521.0572509765625, |
|
"logps/real": -434.34857177734375, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.05005559325218201, |
|
"rewards/margins": 0.32770293951034546, |
|
"rewards/real": 0.27764734625816345, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0640569395017794e-07, |
|
"logits/generated": 0.20849747955799103, |
|
"logits/real": -0.19438520073890686, |
|
"logps/generated": -506.94970703125, |
|
"logps/real": -476.08905029296875, |
|
"loss": 0.1318, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.04421483352780342, |
|
"rewards/margins": 0.30931296944618225, |
|
"rewards/real": 0.2650981545448303, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0462633451957295e-07, |
|
"logits/generated": 0.2435588538646698, |
|
"logits/real": -0.20184215903282166, |
|
"logps/generated": -525.3304443359375, |
|
"logps/real": -493.5686950683594, |
|
"loss": 0.129, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.040996938943862915, |
|
"rewards/margins": 0.3434499204158783, |
|
"rewards/real": 0.3024529814720154, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0284697508896797e-07, |
|
"logits/generated": 0.21247819066047668, |
|
"logits/real": -0.2371760904788971, |
|
"logps/generated": -513.602783203125, |
|
"logps/real": -488.39349365234375, |
|
"loss": 0.1314, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.032181791961193085, |
|
"rewards/margins": 0.3022555410861969, |
|
"rewards/real": 0.27007371187210083, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0106761565836293e-07, |
|
"logits/generated": 0.21260352432727814, |
|
"logits/real": -0.18493767082691193, |
|
"logps/generated": -530.4390869140625, |
|
"logps/real": -451.08502197265625, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04360523074865341, |
|
"rewards/margins": 0.3299635052680969, |
|
"rewards/real": 0.2863582968711853, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9928825622775795e-07, |
|
"logits/generated": 0.17910648882389069, |
|
"logits/real": -0.21472099423408508, |
|
"logps/generated": -506.29864501953125, |
|
"logps/real": -440.6806640625, |
|
"loss": 0.1298, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.049231477081775665, |
|
"rewards/margins": 0.3400951623916626, |
|
"rewards/real": 0.29086369276046753, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.97508896797153e-07, |
|
"logits/generated": 0.2306959182024002, |
|
"logits/real": -0.22861719131469727, |
|
"logps/generated": -518.2430419921875, |
|
"logps/real": -484.080322265625, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.04008597880601883, |
|
"rewards/margins": 0.31413963437080383, |
|
"rewards/real": 0.2740536630153656, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9572953736654803e-07, |
|
"logits/generated": 0.19337113201618195, |
|
"logits/real": -0.1461922973394394, |
|
"logps/generated": -520.7548828125, |
|
"logps/real": -412.48834228515625, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04457572102546692, |
|
"rewards/margins": 0.33213964104652405, |
|
"rewards/real": 0.28756392002105713, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9395017793594304e-07, |
|
"logits/generated": 0.19213825464248657, |
|
"logits/real": -0.17207488417625427, |
|
"logps/generated": -508.79742431640625, |
|
"logps/real": -431.6582946777344, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/generated": -0.03421945124864578, |
|
"rewards/margins": 0.315326452255249, |
|
"rewards/real": 0.2811070382595062, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9217081850533806e-07, |
|
"logits/generated": 0.20467457175254822, |
|
"logits/real": -0.18655052781105042, |
|
"logps/generated": -534.2474365234375, |
|
"logps/real": -451.9898986816406, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.045324601233005524, |
|
"rewards/margins": 0.33579733967781067, |
|
"rewards/real": 0.29047271609306335, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9039145907473307e-07, |
|
"logits/generated": 0.1963758021593094, |
|
"logits/real": -0.24852268397808075, |
|
"logps/generated": -519.084716796875, |
|
"logps/real": -475.10467529296875, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04257890582084656, |
|
"rewards/margins": 0.3550090789794922, |
|
"rewards/real": 0.31243017315864563, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.886120996441281e-07, |
|
"logits/generated": 0.18512029945850372, |
|
"logits/real": -0.20866036415100098, |
|
"logps/generated": -530.0177612304688, |
|
"logps/real": -454.27496337890625, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.03352942317724228, |
|
"rewards/margins": 0.34436360001564026, |
|
"rewards/real": 0.310834139585495, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8683274021352315e-07, |
|
"logits/generated": 0.20876750349998474, |
|
"logits/real": -0.23259572684764862, |
|
"logps/generated": -542.6410522460938, |
|
"logps/real": -462.28558349609375, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.028329987078905106, |
|
"rewards/margins": 0.32887953519821167, |
|
"rewards/real": 0.30054956674575806, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8505338078291817e-07, |
|
"logits/generated": 0.20735135674476624, |
|
"logits/real": -0.18274542689323425, |
|
"logps/generated": -531.2545776367188, |
|
"logps/real": -445.79254150390625, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.049115560948848724, |
|
"rewards/margins": 0.34622785449028015, |
|
"rewards/real": 0.29711228609085083, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8327402135231313e-07, |
|
"logits/generated": 0.22061915695667267, |
|
"logits/real": -0.1666678935289383, |
|
"logps/generated": -512.0516967773438, |
|
"logps/real": -454.59765625, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.031470268964767456, |
|
"rewards/margins": 0.3416782021522522, |
|
"rewards/real": 0.31020793318748474, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8149466192170814e-07, |
|
"logits/generated": 0.20676842331886292, |
|
"logits/real": -0.22948101162910461, |
|
"logps/generated": -517.1962890625, |
|
"logps/real": -462.365478515625, |
|
"loss": 0.1289, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.05391014367341995, |
|
"rewards/margins": 0.35990110039711, |
|
"rewards/real": 0.30599093437194824, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7971530249110316e-07, |
|
"logits/generated": 0.21802881360054016, |
|
"logits/real": -0.20144328474998474, |
|
"logps/generated": -512.7889404296875, |
|
"logps/real": -446.5421447753906, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04316414147615433, |
|
"rewards/margins": 0.35774049162864685, |
|
"rewards/real": 0.3145763874053955, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7793594306049817e-07, |
|
"logits/generated": 0.1871260106563568, |
|
"logits/real": -0.18488526344299316, |
|
"logps/generated": -512.2315063476562, |
|
"logps/real": -420.43865966796875, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.0390264093875885, |
|
"rewards/margins": 0.3577350676059723, |
|
"rewards/real": 0.3187086582183838, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7615658362989324e-07, |
|
"logits/generated": 0.236494779586792, |
|
"logits/real": -0.15096931159496307, |
|
"logps/generated": -507.55548095703125, |
|
"logps/real": -445.41046142578125, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.048680149018764496, |
|
"rewards/margins": 0.3694431185722351, |
|
"rewards/real": 0.3207629919052124, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7437722419928825e-07, |
|
"logits/generated": 0.22562535107135773, |
|
"logits/real": -0.19512876868247986, |
|
"logps/generated": -531.6770629882812, |
|
"logps/real": -446.9190979003906, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04859489947557449, |
|
"rewards/margins": 0.3696749806404114, |
|
"rewards/real": 0.3210800588130951, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7259786476868327e-07, |
|
"logits/generated": 0.2176874577999115, |
|
"logits/real": -0.1850893795490265, |
|
"logps/generated": -514.0935668945312, |
|
"logps/real": -441.65728759765625, |
|
"loss": 0.1298, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/generated": -0.03823425993323326, |
|
"rewards/margins": 0.3405149579048157, |
|
"rewards/real": 0.3022806644439697, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.708185053380783e-07, |
|
"logits/generated": 0.24159689247608185, |
|
"logits/real": -0.2040737122297287, |
|
"logps/generated": -501.77117919921875, |
|
"logps/real": -471.8893127441406, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.048200882971286774, |
|
"rewards/margins": 0.37571340799331665, |
|
"rewards/real": 0.3275125324726105, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.690391459074733e-07, |
|
"logits/generated": 0.2472068816423416, |
|
"logits/real": -0.1870822012424469, |
|
"logps/generated": -508.8600158691406, |
|
"logps/real": -473.0227966308594, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.04576011374592781, |
|
"rewards/margins": 0.36241215467453003, |
|
"rewards/real": 0.3166520297527313, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6725978647686836e-07, |
|
"logits/generated": 0.2242005616426468, |
|
"logits/real": -0.22695569694042206, |
|
"logps/generated": -510.89080810546875, |
|
"logps/real": -479.42108154296875, |
|
"loss": 0.1283, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.049571048468351364, |
|
"rewards/margins": 0.36083894968032837, |
|
"rewards/real": 0.3112679123878479, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.654804270462633e-07, |
|
"logits/generated": 0.21036097407341003, |
|
"logits/real": -0.1436823010444641, |
|
"logps/generated": -523.9207153320312, |
|
"logps/real": -429.45458984375, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06013381481170654, |
|
"rewards/margins": 0.3962857127189636, |
|
"rewards/real": 0.3361518979072571, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6370106761565834e-07, |
|
"logits/generated": 0.24738578498363495, |
|
"logits/real": -0.13808973133563995, |
|
"logps/generated": -509.6529235839844, |
|
"logps/real": -439.38531494140625, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.0440841019153595, |
|
"rewards/margins": 0.36679965257644653, |
|
"rewards/real": 0.32271555066108704, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6192170818505336e-07, |
|
"logits/generated": 0.22692394256591797, |
|
"logits/real": -0.17329858243465424, |
|
"logps/generated": -526.3966064453125, |
|
"logps/real": -431.3544921875, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.05675189942121506, |
|
"rewards/margins": 0.4089035987854004, |
|
"rewards/real": 0.35215169191360474, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6014234875444837e-07, |
|
"logits/generated": 0.24121804535388947, |
|
"logits/real": -0.20156726241111755, |
|
"logps/generated": -524.36572265625, |
|
"logps/real": -451.2276306152344, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.06758119910955429, |
|
"rewards/margins": 0.39545828104019165, |
|
"rewards/real": 0.32787710428237915, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.583629893238434e-07, |
|
"logits/generated": 0.22711153328418732, |
|
"logits/real": -0.16746626794338226, |
|
"logps/generated": -536.3843994140625, |
|
"logps/real": -448.345703125, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.04590682312846184, |
|
"rewards/margins": 0.38996079564094543, |
|
"rewards/real": 0.3440540134906769, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.565836298932384e-07, |
|
"logits/generated": 0.19922706484794617, |
|
"logits/real": -0.21461109817028046, |
|
"logps/generated": -515.5289306640625, |
|
"logps/real": -459.3103942871094, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05231200531125069, |
|
"rewards/margins": 0.3805178105831146, |
|
"rewards/real": 0.32820582389831543, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5480427046263347e-07, |
|
"logits/generated": 0.22783759236335754, |
|
"logits/real": -0.20611587166786194, |
|
"logps/generated": -521.995849609375, |
|
"logps/real": -476.81585693359375, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04818415641784668, |
|
"rewards/margins": 0.3822416663169861, |
|
"rewards/real": 0.3340575098991394, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.530249110320285e-07, |
|
"logits/generated": 0.2178882360458374, |
|
"logits/real": -0.25764960050582886, |
|
"logps/generated": -535.8800659179688, |
|
"logps/real": -476.8208923339844, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.0434628501534462, |
|
"rewards/margins": 0.370759516954422, |
|
"rewards/real": 0.3272966742515564, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.512455516014235e-07, |
|
"logits/generated": 0.18462668359279633, |
|
"logits/real": -0.2151033580303192, |
|
"logps/generated": -523.6564331054688, |
|
"logps/real": -465.83929443359375, |
|
"loss": 0.1281, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.0575849823653698, |
|
"rewards/margins": 0.3719588816165924, |
|
"rewards/real": 0.3143738806247711, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4946619217081846e-07, |
|
"logits/generated": 0.21648356318473816, |
|
"logits/real": -0.22641988098621368, |
|
"logps/generated": -503.7261657714844, |
|
"logps/real": -470.4150390625, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.035186249762773514, |
|
"rewards/margins": 0.36030828952789307, |
|
"rewards/real": 0.32512202858924866, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4768683274021347e-07, |
|
"logits/generated": 0.22231841087341309, |
|
"logits/real": -0.2200026959180832, |
|
"logps/generated": -551.32373046875, |
|
"logps/real": -477.9534606933594, |
|
"loss": 0.1272, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.07955564558506012, |
|
"rewards/margins": 0.3987639546394348, |
|
"rewards/real": 0.3192083239555359, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.459074733096085e-07, |
|
"logits/generated": 0.23221631348133087, |
|
"logits/real": -0.17455917596817017, |
|
"logps/generated": -522.5137329101562, |
|
"logps/real": -466.8882751464844, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.07725702226161957, |
|
"rewards/margins": 0.4179654121398926, |
|
"rewards/real": 0.3407083749771118, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4412811387900355e-07, |
|
"logits/generated": 0.24756267666816711, |
|
"logits/real": -0.20915552973747253, |
|
"logps/generated": -514.363037109375, |
|
"logps/real": -500.83721923828125, |
|
"loss": 0.1266, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.05856259539723396, |
|
"rewards/margins": 0.3967771828174591, |
|
"rewards/real": 0.33821457624435425, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4234875444839857e-07, |
|
"logits/generated": 0.21352288126945496, |
|
"logits/real": -0.20619618892669678, |
|
"logps/generated": -510.033203125, |
|
"logps/real": -440.34423828125, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.03017326630651951, |
|
"rewards/margins": 0.3813417851924896, |
|
"rewards/real": 0.35116851329803467, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_logits/generated": 0.22333942353725433, |
|
"eval_logits/real": -0.18551431596279144, |
|
"eval_logps/generated": -521.822509765625, |
|
"eval_logps/real": -454.307861328125, |
|
"eval_loss": 0.12625987827777863, |
|
"eval_rewards/accuracies": 0.9365000128746033, |
|
"eval_rewards/generated": -0.0522722452878952, |
|
"eval_rewards/margins": 0.3948952555656433, |
|
"eval_rewards/real": 0.3426230549812317, |
|
"eval_runtime": 467.1537, |
|
"eval_samples_per_second": 4.281, |
|
"eval_steps_per_second": 1.07, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.405693950177936e-07, |
|
"logits/generated": 0.20984432101249695, |
|
"logits/real": -0.19126197695732117, |
|
"logps/generated": -518.5199584960938, |
|
"logps/real": -439.51324462890625, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.05940445512533188, |
|
"rewards/margins": 0.4006901681423187, |
|
"rewards/real": 0.34128570556640625, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.387900355871886e-07, |
|
"logits/generated": 0.23297259211540222, |
|
"logits/real": -0.1324882060289383, |
|
"logps/generated": -533.5361938476562, |
|
"logps/real": -436.12335205078125, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.044547099620103836, |
|
"rewards/margins": 0.39039477705955505, |
|
"rewards/real": 0.3458476662635803, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.370106761565836e-07, |
|
"logits/generated": 0.21869917213916779, |
|
"logits/real": -0.16005317866802216, |
|
"logps/generated": -505.5008239746094, |
|
"logps/real": -440.06341552734375, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.03350327908992767, |
|
"rewards/margins": 0.3943425714969635, |
|
"rewards/real": 0.360839307308197, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.352313167259787e-07, |
|
"logits/generated": 0.20472922921180725, |
|
"logits/real": -0.17530474066734314, |
|
"logps/generated": -510.0504455566406, |
|
"logps/real": -431.83587646484375, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.05266762897372246, |
|
"rewards/margins": 0.3853222131729126, |
|
"rewards/real": 0.33265453577041626, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.334519572953737e-07, |
|
"logits/generated": 0.2204224169254303, |
|
"logits/real": -0.1700909435749054, |
|
"logps/generated": -510.51141357421875, |
|
"logps/real": -437.6064453125, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.03798319026827812, |
|
"rewards/margins": 0.40347009897232056, |
|
"rewards/real": 0.36548691987991333, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3167259786476865e-07, |
|
"logits/generated": 0.21306562423706055, |
|
"logits/real": -0.19046835601329803, |
|
"logps/generated": -513.0300903320312, |
|
"logps/real": -442.71124267578125, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.03914360702037811, |
|
"rewards/margins": 0.38992029428482056, |
|
"rewards/real": 0.35077667236328125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2989323843416367e-07, |
|
"logits/generated": 0.21032293140888214, |
|
"logits/real": -0.21575979888439178, |
|
"logps/generated": -509.75128173828125, |
|
"logps/real": -469.04351806640625, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.06580056995153427, |
|
"rewards/margins": 0.40863609313964844, |
|
"rewards/real": 0.3428354859352112, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.281138790035587e-07, |
|
"logits/generated": 0.24854688346385956, |
|
"logits/real": -0.14409920573234558, |
|
"logps/generated": -514.2542724609375, |
|
"logps/real": -451.86846923828125, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.02604399248957634, |
|
"rewards/margins": 0.3732038140296936, |
|
"rewards/real": 0.34715980291366577, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.263345195729537e-07, |
|
"logits/generated": 0.1697693169116974, |
|
"logits/real": -0.22252459824085236, |
|
"logps/generated": -515.5403442382812, |
|
"logps/real": -433.65496826171875, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.06866597384214401, |
|
"rewards/margins": 0.40779757499694824, |
|
"rewards/real": 0.3391316533088684, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.245551601423487e-07, |
|
"logits/generated": 0.24601443111896515, |
|
"logits/real": -0.2113780677318573, |
|
"logps/generated": -532.38232421875, |
|
"logps/real": -487.1578063964844, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.06285814940929413, |
|
"rewards/margins": 0.40850821137428284, |
|
"rewards/real": 0.3456500470638275, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.227758007117438e-07, |
|
"logits/generated": 0.21627071499824524, |
|
"logits/real": -0.16779956221580505, |
|
"logps/generated": -538.037109375, |
|
"logps/real": -454.26580810546875, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.07498253881931305, |
|
"rewards/margins": 0.4246435761451721, |
|
"rewards/real": 0.34966105222702026, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.209964412811388e-07, |
|
"logits/generated": 0.22846777737140656, |
|
"logits/real": -0.23057064414024353, |
|
"logps/generated": -534.3760986328125, |
|
"logps/real": -467.74365234375, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05258546024560928, |
|
"rewards/margins": 0.4155358672142029, |
|
"rewards/real": 0.3629503846168518, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.192170818505338e-07, |
|
"logits/generated": 0.22176751494407654, |
|
"logits/real": -0.2189159095287323, |
|
"logps/generated": -512.0242309570312, |
|
"logps/real": -471.23724365234375, |
|
"loss": 0.126, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.02907324768602848, |
|
"rewards/margins": 0.3788103461265564, |
|
"rewards/real": 0.34973710775375366, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.174377224199288e-07, |
|
"logits/generated": 0.22437652945518494, |
|
"logits/real": -0.14511239528656006, |
|
"logps/generated": -529.4261474609375, |
|
"logps/real": -408.4437561035156, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.09781909734010696, |
|
"rewards/margins": 0.45350271463394165, |
|
"rewards/real": 0.3556835353374481, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1565836298932384e-07, |
|
"logits/generated": 0.2222731113433838, |
|
"logits/real": -0.20278367400169373, |
|
"logps/generated": -516.1197509765625, |
|
"logps/real": -449.39227294921875, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.05577481910586357, |
|
"rewards/margins": 0.41228675842285156, |
|
"rewards/real": 0.3565119206905365, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.138790035587188e-07, |
|
"logits/generated": 0.19292616844177246, |
|
"logits/real": -0.1925380676984787, |
|
"logps/generated": -521.6808471679688, |
|
"logps/real": -438.78778076171875, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.060262132436037064, |
|
"rewards/margins": 0.4185408055782318, |
|
"rewards/real": 0.35827872157096863, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1209964412811387e-07, |
|
"logits/generated": 0.23866550624370575, |
|
"logits/real": -0.1537286341190338, |
|
"logps/generated": -510.27069091796875, |
|
"logps/real": -429.9480895996094, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.056734926998615265, |
|
"rewards/margins": 0.4073144495487213, |
|
"rewards/real": 0.35057950019836426, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.103202846975089e-07, |
|
"logits/generated": 0.22526803612709045, |
|
"logits/real": -0.2068435698747635, |
|
"logps/generated": -523.3880615234375, |
|
"logps/real": -460.9934997558594, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.05335558205842972, |
|
"rewards/margins": 0.41251659393310547, |
|
"rewards/real": 0.35916104912757874, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.085409252669039e-07, |
|
"logits/generated": 0.2166483849287033, |
|
"logits/real": -0.16389943659305573, |
|
"logps/generated": -533.65478515625, |
|
"logps/real": -447.15252685546875, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.04129910469055176, |
|
"rewards/margins": 0.37630194425582886, |
|
"rewards/real": 0.3350028395652771, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.067615658362989e-07, |
|
"logits/generated": 0.22569017112255096, |
|
"logits/real": -0.1831541508436203, |
|
"logps/generated": -510.294921875, |
|
"logps/real": -446.5750427246094, |
|
"loss": 0.1271, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.0428488552570343, |
|
"rewards/margins": 0.3756479024887085, |
|
"rewards/real": 0.3327990472316742, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.049822064056939e-07, |
|
"logits/generated": 0.23136644065380096, |
|
"logits/real": -0.23328185081481934, |
|
"logps/generated": -552.6895751953125, |
|
"logps/real": -474.44781494140625, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.07789872586727142, |
|
"rewards/margins": 0.44054466485977173, |
|
"rewards/real": 0.3626458942890167, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.03202846975089e-07, |
|
"logits/generated": 0.2204451858997345, |
|
"logits/real": -0.19231361150741577, |
|
"logps/generated": -524.7732543945312, |
|
"logps/real": -432.150390625, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.05935334041714668, |
|
"rewards/margins": 0.4120023250579834, |
|
"rewards/real": 0.3526490032672882, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.01423487544484e-07, |
|
"logits/generated": 0.2073640078306198, |
|
"logits/real": -0.21677975356578827, |
|
"logps/generated": -534.759765625, |
|
"logps/real": -462.5953674316406, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.0705971047282219, |
|
"rewards/margins": 0.4193173944950104, |
|
"rewards/real": 0.3487202823162079, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.99644128113879e-07, |
|
"logits/generated": 0.23698802292346954, |
|
"logits/real": -0.12353632599115372, |
|
"logps/generated": -526.7183227539062, |
|
"logps/real": -405.3436584472656, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.03835119307041168, |
|
"rewards/margins": 0.4056209623813629, |
|
"rewards/real": 0.3672698140144348, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.97864768683274e-07, |
|
"logits/generated": 0.21073408424854279, |
|
"logits/real": -0.13484127819538116, |
|
"logps/generated": -521.523193359375, |
|
"logps/real": -420.841796875, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.0601874478161335, |
|
"rewards/margins": 0.44554439187049866, |
|
"rewards/real": 0.38535696268081665, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.96085409252669e-07, |
|
"logits/generated": 0.2359226495027542, |
|
"logits/real": -0.139388769865036, |
|
"logps/generated": -498.6105041503906, |
|
"logps/real": -436.9283142089844, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.06179014965891838, |
|
"rewards/margins": 0.4049047827720642, |
|
"rewards/real": 0.3431146442890167, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.94306049822064e-07, |
|
"logits/generated": 0.22477808594703674, |
|
"logits/real": -0.18658998608589172, |
|
"logps/generated": -539.577392578125, |
|
"logps/real": -436.072998046875, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.06574585288763046, |
|
"rewards/margins": 0.4261474609375, |
|
"rewards/real": 0.36040163040161133, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.92526690391459e-07, |
|
"logits/generated": 0.24180345237255096, |
|
"logits/real": -0.10471512377262115, |
|
"logps/generated": -533.6973266601562, |
|
"logps/real": -423.3241271972656, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.08005066215991974, |
|
"rewards/margins": 0.45733848214149475, |
|
"rewards/real": 0.3772878050804138, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.907473309608541e-07, |
|
"logits/generated": 0.21116897463798523, |
|
"logits/real": -0.19643299281597137, |
|
"logps/generated": -534.4854125976562, |
|
"logps/real": -429.40313720703125, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.03764469176530838, |
|
"rewards/margins": 0.40410351753234863, |
|
"rewards/real": 0.36645883321762085, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.889679715302491e-07, |
|
"logits/generated": 0.2296733409166336, |
|
"logits/real": -0.18100515007972717, |
|
"logps/generated": -523.2075805664062, |
|
"logps/real": -454.98931884765625, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.06059467792510986, |
|
"rewards/margins": 0.42317479848861694, |
|
"rewards/real": 0.3625801205635071, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.871886120996441e-07, |
|
"logits/generated": 0.269220769405365, |
|
"logits/real": -0.13889244198799133, |
|
"logps/generated": -497.45379638671875, |
|
"logps/real": -437.2528381347656, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.04436558485031128, |
|
"rewards/margins": 0.41556286811828613, |
|
"rewards/real": 0.37119728326797485, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8540925266903914e-07, |
|
"logits/generated": 0.21270576119422913, |
|
"logits/real": -0.18475386500358582, |
|
"logps/generated": -528.8832397460938, |
|
"logps/real": -440.03057861328125, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.0362856462597847, |
|
"rewards/margins": 0.42006000876426697, |
|
"rewards/real": 0.38377436995506287, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8362989323843415e-07, |
|
"logits/generated": 0.23695051670074463, |
|
"logits/real": -0.18097969889640808, |
|
"logps/generated": -525.389404296875, |
|
"logps/real": -460.3702697753906, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.06530066579580307, |
|
"rewards/margins": 0.4440096318721771, |
|
"rewards/real": 0.37870892882347107, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.818505338078292e-07, |
|
"logits/generated": 0.24917741119861603, |
|
"logits/real": -0.16412577033042908, |
|
"logps/generated": -509.0914001464844, |
|
"logps/real": -463.69830322265625, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.0415273942053318, |
|
"rewards/margins": 0.3965379595756531, |
|
"rewards/real": 0.3550105392932892, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.800711743772242e-07, |
|
"logits/generated": 0.25012919306755066, |
|
"logits/real": -0.1794908493757248, |
|
"logps/generated": -524.6221923828125, |
|
"logps/real": -469.295166015625, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.06365501135587692, |
|
"rewards/margins": 0.42465314269065857, |
|
"rewards/real": 0.36099809408187866, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.782918149466192e-07, |
|
"logits/generated": 0.2226579487323761, |
|
"logits/real": -0.19419345259666443, |
|
"logps/generated": -530.0787963867188, |
|
"logps/real": -469.27606201171875, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.051626600325107574, |
|
"rewards/margins": 0.4078029990196228, |
|
"rewards/real": 0.3561764359474182, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.765124555160142e-07, |
|
"logits/generated": 0.1991543471813202, |
|
"logits/real": -0.21607446670532227, |
|
"logps/generated": -518.56005859375, |
|
"logps/real": -447.11962890625, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.05867942050099373, |
|
"rewards/margins": 0.42213720083236694, |
|
"rewards/real": 0.3634577691555023, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.747330960854092e-07, |
|
"logits/generated": 0.21239633858203888, |
|
"logits/real": -0.15555164217948914, |
|
"logps/generated": -504.6756286621094, |
|
"logps/real": -419.6802673339844, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.03540899232029915, |
|
"rewards/margins": 0.4143758714199066, |
|
"rewards/real": 0.3789668679237366, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7295373665480424e-07, |
|
"logits/generated": 0.23810231685638428, |
|
"logits/real": -0.17116566002368927, |
|
"logps/generated": -507.6982421875, |
|
"logps/real": -465.0140686035156, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.035895101726055145, |
|
"rewards/margins": 0.40210455656051636, |
|
"rewards/real": 0.3662094473838806, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.711743772241993e-07, |
|
"logits/generated": 0.23017612099647522, |
|
"logits/real": -0.19639410078525543, |
|
"logps/generated": -513.0866088867188, |
|
"logps/real": -443.845947265625, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06835327297449112, |
|
"rewards/margins": 0.44039884209632874, |
|
"rewards/real": 0.3720455765724182, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.693950177935943e-07, |
|
"logits/generated": 0.2244982272386551, |
|
"logits/real": -0.19841459393501282, |
|
"logps/generated": -542.2571411132812, |
|
"logps/real": -450.25, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.07537367194890976, |
|
"rewards/margins": 0.42732158303260803, |
|
"rewards/real": 0.35194793343544006, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6761565836298933e-07, |
|
"logits/generated": 0.2113502472639084, |
|
"logits/real": -0.18353787064552307, |
|
"logps/generated": -529.5034790039062, |
|
"logps/real": -447.72357177734375, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.06783580780029297, |
|
"rewards/margins": 0.4514337480068207, |
|
"rewards/real": 0.3835979402065277, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6583629893238435e-07, |
|
"logits/generated": 0.23711976408958435, |
|
"logits/real": -0.16799500584602356, |
|
"logps/generated": -506.9942932128906, |
|
"logps/real": -472.8714294433594, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.043571460992097855, |
|
"rewards/margins": 0.4209679961204529, |
|
"rewards/real": 0.3773965537548065, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6405693950177936e-07, |
|
"logits/generated": 0.21126489341259003, |
|
"logits/real": -0.19328458607196808, |
|
"logps/generated": -506.70159912109375, |
|
"logps/real": -458.549560546875, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.05301676318049431, |
|
"rewards/margins": 0.42876943945884705, |
|
"rewards/real": 0.375752717256546, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.622775800711743e-07, |
|
"logits/generated": 0.23474998772144318, |
|
"logits/real": -0.1575848013162613, |
|
"logps/generated": -540.1917114257812, |
|
"logps/real": -445.42156982421875, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.07524801790714264, |
|
"rewards/margins": 0.4695201516151428, |
|
"rewards/real": 0.3942721486091614, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6049822064056934e-07, |
|
"logits/generated": 0.20971938967704773, |
|
"logits/real": -0.18155238032341003, |
|
"logps/generated": -521.19775390625, |
|
"logps/real": -440.2428283691406, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05046185106039047, |
|
"rewards/margins": 0.4103214144706726, |
|
"rewards/real": 0.3598595857620239, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.587188612099644e-07, |
|
"logits/generated": 0.2089037150144577, |
|
"logits/real": -0.15210075676441193, |
|
"logps/generated": -532.7290649414062, |
|
"logps/real": -409.13787841796875, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.06394266337156296, |
|
"rewards/margins": 0.4383109509944916, |
|
"rewards/real": 0.37436825037002563, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.569395017793594e-07, |
|
"logits/generated": 0.2460833042860031, |
|
"logits/real": -0.13932695984840393, |
|
"logps/generated": -526.330322265625, |
|
"logps/real": -448.05377197265625, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.07719500362873077, |
|
"rewards/margins": 0.4495556950569153, |
|
"rewards/real": 0.3723606467247009, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5516014234875444e-07, |
|
"logits/generated": 0.20458194613456726, |
|
"logits/real": -0.17320473492145538, |
|
"logps/generated": -524.3390502929688, |
|
"logps/real": -426.5902404785156, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.0741974413394928, |
|
"rewards/margins": 0.46985602378845215, |
|
"rewards/real": 0.39565858244895935, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5338078291814945e-07, |
|
"logits/generated": 0.26021358370780945, |
|
"logits/real": -0.15163478255271912, |
|
"logps/generated": -508.49951171875, |
|
"logps/real": -436.19061279296875, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.051833074539899826, |
|
"rewards/margins": 0.4227888584136963, |
|
"rewards/real": 0.37095582485198975, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5160142348754447e-07, |
|
"logits/generated": 0.22410514950752258, |
|
"logits/real": -0.19840450584888458, |
|
"logps/generated": -527.9317016601562, |
|
"logps/real": -448.7506408691406, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05459089204668999, |
|
"rewards/margins": 0.44405126571655273, |
|
"rewards/real": 0.38946038484573364, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.498220640569395e-07, |
|
"logits/generated": 0.21920056641101837, |
|
"logits/real": -0.1736256182193756, |
|
"logps/generated": -534.7623291015625, |
|
"logps/real": -443.5054626464844, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.06553854048252106, |
|
"rewards/margins": 0.4489002823829651, |
|
"rewards/real": 0.3833617568016052, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.480427046263345e-07, |
|
"logits/generated": 0.21249528229236603, |
|
"logits/real": -0.14947716891765594, |
|
"logps/generated": -537.5214233398438, |
|
"logps/real": -431.114013671875, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06390459835529327, |
|
"rewards/margins": 0.4571428894996643, |
|
"rewards/real": 0.3932383060455322, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.462633451957295e-07, |
|
"logits/generated": 0.22841641306877136, |
|
"logits/real": -0.1430717259645462, |
|
"logps/generated": -520.1146240234375, |
|
"logps/real": -417.46124267578125, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.051337797194719315, |
|
"rewards/margins": 0.4231833815574646, |
|
"rewards/real": 0.37184563279151917, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.444839857651246e-07, |
|
"logits/generated": 0.2303360402584076, |
|
"logits/real": -0.21451357007026672, |
|
"logps/generated": -516.4577026367188, |
|
"logps/real": -471.78485107421875, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.04709147289395332, |
|
"rewards/margins": 0.4153119921684265, |
|
"rewards/real": 0.3682205379009247, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.427046263345196e-07, |
|
"logits/generated": 0.24512319266796112, |
|
"logits/real": -0.1643613874912262, |
|
"logps/generated": -521.9609985351562, |
|
"logps/real": -444.45794677734375, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.07158510386943817, |
|
"rewards/margins": 0.45351576805114746, |
|
"rewards/real": 0.3819306790828705, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4092526690391455e-07, |
|
"logits/generated": 0.24102430045604706, |
|
"logits/real": -0.13085922598838806, |
|
"logps/generated": -524.731689453125, |
|
"logps/real": -430.0142517089844, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.0607861764729023, |
|
"rewards/margins": 0.42205628752708435, |
|
"rewards/real": 0.36127012968063354, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.391459074733096e-07, |
|
"logits/generated": 0.23209047317504883, |
|
"logits/real": -0.15809141099452972, |
|
"logps/generated": -519.98095703125, |
|
"logps/real": -441.5606994628906, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.06819422543048859, |
|
"rewards/margins": 0.45197123289108276, |
|
"rewards/real": 0.383776992559433, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.373665480427046e-07, |
|
"logits/generated": 0.25702834129333496, |
|
"logits/real": -0.1923447549343109, |
|
"logps/generated": -512.0235595703125, |
|
"logps/real": -448.59381103515625, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.0629502683877945, |
|
"rewards/margins": 0.449089914560318, |
|
"rewards/real": 0.3861396312713623, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3558718861209965e-07, |
|
"logits/generated": 0.20865292847156525, |
|
"logits/real": -0.18462467193603516, |
|
"logps/generated": -508.04833984375, |
|
"logps/real": -440.5543518066406, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.025387108325958252, |
|
"rewards/margins": 0.4082257151603699, |
|
"rewards/real": 0.3828386664390564, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": 0.22340521216392517, |
|
"eval_logits/real": -0.18402215838432312, |
|
"eval_logps/generated": -521.901123046875, |
|
"eval_logps/real": -453.87786865234375, |
|
"eval_loss": 0.12333940714597702, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/generated": -0.06012633815407753, |
|
"eval_rewards/margins": 0.4457506537437439, |
|
"eval_rewards/real": 0.38562434911727905, |
|
"eval_runtime": 467.7656, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 1.069, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3380782918149466e-07, |
|
"logits/generated": 0.23081421852111816, |
|
"logits/real": -0.17550137639045715, |
|
"logps/generated": -508.828369140625, |
|
"logps/real": -436.1839904785156, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.056590236723423004, |
|
"rewards/margins": 0.44170355796813965, |
|
"rewards/real": 0.38511329889297485, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3202846975088965e-07, |
|
"logits/generated": 0.2366132289171219, |
|
"logits/real": -0.2016211748123169, |
|
"logps/generated": -530.8549194335938, |
|
"logps/real": -485.4046325683594, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06151513382792473, |
|
"rewards/margins": 0.43632203340530396, |
|
"rewards/real": 0.3748069107532501, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.302491103202847e-07, |
|
"logits/generated": 0.2326919585466385, |
|
"logits/real": -0.1740848273038864, |
|
"logps/generated": -510.0120544433594, |
|
"logps/real": -443.95330810546875, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.05962473154067993, |
|
"rewards/margins": 0.4408292770385742, |
|
"rewards/real": 0.38120460510253906, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.284697508896797e-07, |
|
"logits/generated": 0.21932096779346466, |
|
"logits/real": -0.19047066569328308, |
|
"logps/generated": -500.6233825683594, |
|
"logps/real": -449.2893981933594, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.03953690081834793, |
|
"rewards/margins": 0.43253955245018005, |
|
"rewards/real": 0.3930026888847351, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2669039145907472e-07, |
|
"logits/generated": 0.24997837841510773, |
|
"logits/real": -0.1812470406293869, |
|
"logps/generated": -535.9525146484375, |
|
"logps/real": -481.9747009277344, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.05714315176010132, |
|
"rewards/margins": 0.4409334659576416, |
|
"rewards/real": 0.3837903141975403, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2491103202846974e-07, |
|
"logits/generated": 0.20015868544578552, |
|
"logits/real": -0.21435590088367462, |
|
"logps/generated": -501.24798583984375, |
|
"logps/real": -457.22003173828125, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.03714684024453163, |
|
"rewards/margins": 0.3973380923271179, |
|
"rewards/real": 0.3601912558078766, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2313167259786475e-07, |
|
"logits/generated": 0.21653036773204803, |
|
"logits/real": -0.20086181163787842, |
|
"logps/generated": -521.1724853515625, |
|
"logps/real": -462.53448486328125, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.04712541028857231, |
|
"rewards/margins": 0.43625393509864807, |
|
"rewards/real": 0.3891284763813019, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2135231316725976e-07, |
|
"logits/generated": 0.2084265649318695, |
|
"logits/real": -0.1898992955684662, |
|
"logps/generated": -528.6702880859375, |
|
"logps/real": -452.29473876953125, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.05745628476142883, |
|
"rewards/margins": 0.4446054995059967, |
|
"rewards/real": 0.38714921474456787, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.195729537366548e-07, |
|
"logits/generated": 0.22444197535514832, |
|
"logits/real": -0.16234365105628967, |
|
"logps/generated": -524.1942749023438, |
|
"logps/real": -446.8998107910156, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.04743706434965134, |
|
"rewards/margins": 0.4473690390586853, |
|
"rewards/real": 0.39993205666542053, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1779359430604982e-07, |
|
"logits/generated": 0.25939130783081055, |
|
"logits/real": -0.14666108787059784, |
|
"logps/generated": -547.7173461914062, |
|
"logps/real": -455.5703125, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.07788354158401489, |
|
"rewards/margins": 0.47866588830947876, |
|
"rewards/real": 0.4007822871208191, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.160142348754448e-07, |
|
"logits/generated": 0.2458912432193756, |
|
"logits/real": -0.17810478806495667, |
|
"logps/generated": -541.607666015625, |
|
"logps/real": -476.3172912597656, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.060118354856967926, |
|
"rewards/margins": 0.4450649619102478, |
|
"rewards/real": 0.38494664430618286, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1423487544483985e-07, |
|
"logits/generated": 0.22042623162269592, |
|
"logits/real": -0.17981268465518951, |
|
"logps/generated": -528.3734130859375, |
|
"logps/real": -457.7159729003906, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.05706105753779411, |
|
"rewards/margins": 0.4376358091831207, |
|
"rewards/real": 0.3805747330188751, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1245551601423486e-07, |
|
"logits/generated": 0.23842720687389374, |
|
"logits/real": -0.18174445629119873, |
|
"logps/generated": -519.1848754882812, |
|
"logps/real": -455.0909729003906, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.059157829731702805, |
|
"rewards/margins": 0.4687219560146332, |
|
"rewards/real": 0.4095641076564789, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1067615658362988e-07, |
|
"logits/generated": 0.23483705520629883, |
|
"logits/real": -0.1849731206893921, |
|
"logps/generated": -523.4705200195312, |
|
"logps/real": -474.61761474609375, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.047459594905376434, |
|
"rewards/margins": 0.43443241715431213, |
|
"rewards/real": 0.3869728147983551, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0889679715302492e-07, |
|
"logits/generated": 0.21335165202617645, |
|
"logits/real": -0.1933227926492691, |
|
"logps/generated": -509.9913635253906, |
|
"logps/real": -453.110107421875, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.04094861447811127, |
|
"rewards/margins": 0.4360763430595398, |
|
"rewards/real": 0.3951277732849121, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.071174377224199e-07, |
|
"logits/generated": 0.22970063984394073, |
|
"logits/real": -0.16639523208141327, |
|
"logps/generated": -530.9884033203125, |
|
"logps/real": -476.9608459472656, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.055165134370326996, |
|
"rewards/margins": 0.4414592683315277, |
|
"rewards/real": 0.3862941563129425, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0533807829181492e-07, |
|
"logits/generated": 0.23509879410266876, |
|
"logits/real": -0.17442622780799866, |
|
"logps/generated": -527.2897338867188, |
|
"logps/real": -449.77783203125, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.0715959221124649, |
|
"rewards/margins": 0.45488494634628296, |
|
"rewards/real": 0.38328900933265686, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0355871886120996e-07, |
|
"logits/generated": 0.22457948327064514, |
|
"logits/real": -0.16395524144172668, |
|
"logps/generated": -541.068359375, |
|
"logps/real": -445.13232421875, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.07380439341068268, |
|
"rewards/margins": 0.46186861395835876, |
|
"rewards/real": 0.3880642354488373, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0177935943060498e-07, |
|
"logits/generated": 0.233793705701828, |
|
"logits/real": -0.17521928250789642, |
|
"logps/generated": -499.760009765625, |
|
"logps/real": -418.1783142089844, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.03161894157528877, |
|
"rewards/margins": 0.43319034576416016, |
|
"rewards/real": 0.4015713632106781, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2e-07, |
|
"logits/generated": 0.24164748191833496, |
|
"logits/real": -0.17329975962638855, |
|
"logps/generated": -509.35968017578125, |
|
"logps/real": -438.0674743652344, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04283856227993965, |
|
"rewards/margins": 0.43587201833724976, |
|
"rewards/real": 0.3930334448814392, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.98220640569395e-07, |
|
"logits/generated": 0.24707035720348358, |
|
"logits/real": -0.19441106915473938, |
|
"logps/generated": -523.8375244140625, |
|
"logps/real": -466.98876953125, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.059231944382190704, |
|
"rewards/margins": 0.4405438005924225, |
|
"rewards/real": 0.38131183385849, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9644128113879002e-07, |
|
"logits/generated": 0.21787595748901367, |
|
"logits/real": -0.1625303477048874, |
|
"logps/generated": -511.28656005859375, |
|
"logps/real": -433.72943115234375, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.04570590704679489, |
|
"rewards/margins": 0.4410758912563324, |
|
"rewards/real": 0.3953699767589569, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9466192170818503e-07, |
|
"logits/generated": 0.2413444072008133, |
|
"logits/real": -0.13660605251789093, |
|
"logps/generated": -518.0386962890625, |
|
"logps/real": -424.8408203125, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.06294144690036774, |
|
"rewards/margins": 0.4464820325374603, |
|
"rewards/real": 0.3835405707359314, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9288256227758008e-07, |
|
"logits/generated": 0.2317664921283722, |
|
"logits/real": -0.17541226744651794, |
|
"logps/generated": -501.98565673828125, |
|
"logps/real": -448.27301025390625, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.03287245333194733, |
|
"rewards/margins": 0.43534618616104126, |
|
"rewards/real": 0.4024737477302551, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.911032028469751e-07, |
|
"logits/generated": 0.2325422316789627, |
|
"logits/real": -0.15674057602882385, |
|
"logps/generated": -521.0519409179688, |
|
"logps/real": -445.3016052246094, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.054769836366176605, |
|
"rewards/margins": 0.4464344382286072, |
|
"rewards/real": 0.3916645646095276, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8932384341637008e-07, |
|
"logits/generated": 0.21268360316753387, |
|
"logits/real": -0.2170007973909378, |
|
"logps/generated": -517.19140625, |
|
"logps/real": -450.4443359375, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -0.05523901432752609, |
|
"rewards/margins": 0.442660391330719, |
|
"rewards/real": 0.3874213695526123, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8754448398576512e-07, |
|
"logits/generated": 0.20065391063690186, |
|
"logits/real": -0.18983721733093262, |
|
"logps/generated": -516.1802978515625, |
|
"logps/real": -441.12176513671875, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.04751238971948624, |
|
"rewards/margins": 0.44967880845069885, |
|
"rewards/real": 0.4021664261817932, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8576512455516013e-07, |
|
"logits/generated": 0.23106245696544647, |
|
"logits/real": -0.12498452514410019, |
|
"logps/generated": -529.2099609375, |
|
"logps/real": -408.1026916503906, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.05271226167678833, |
|
"rewards/margins": 0.4329907298088074, |
|
"rewards/real": 0.38027846813201904, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8398576512455515e-07, |
|
"logits/generated": 0.2200096845626831, |
|
"logits/real": -0.16534483432769775, |
|
"logps/generated": -525.7679443359375, |
|
"logps/real": -423.7900390625, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/generated": -0.07986018806695938, |
|
"rewards/margins": 0.4750327467918396, |
|
"rewards/real": 0.3951725959777832, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.822064056939502e-07, |
|
"logits/generated": 0.21786005795001984, |
|
"logits/real": -0.12979988753795624, |
|
"logps/generated": -517.2242431640625, |
|
"logps/real": -415.1058654785156, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.044034309685230255, |
|
"rewards/margins": 0.4487292170524597, |
|
"rewards/real": 0.40469488501548767, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8042704626334518e-07, |
|
"logits/generated": 0.19789546728134155, |
|
"logits/real": -0.18105196952819824, |
|
"logps/generated": -516.8419799804688, |
|
"logps/real": -411.0154724121094, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.07462586462497711, |
|
"rewards/margins": 0.4560207426548004, |
|
"rewards/real": 0.3813949227333069, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.786476868327402e-07, |
|
"logits/generated": 0.23341619968414307, |
|
"logits/real": -0.16988156735897064, |
|
"logps/generated": -524.7384643554688, |
|
"logps/real": -435.165771484375, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.04136037454009056, |
|
"rewards/margins": 0.41689807176589966, |
|
"rewards/real": 0.3755376935005188, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7686832740213523e-07, |
|
"logits/generated": 0.21485109627246857, |
|
"logits/real": -0.22060473263263702, |
|
"logps/generated": -524.8414916992188, |
|
"logps/real": -476.2647399902344, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.04514962434768677, |
|
"rewards/margins": 0.43863219022750854, |
|
"rewards/real": 0.39348262548446655, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7508896797153025e-07, |
|
"logits/generated": 0.2269206941127777, |
|
"logits/real": -0.1672908365726471, |
|
"logps/generated": -529.927978515625, |
|
"logps/real": -418.10711669921875, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.061279088258743286, |
|
"rewards/margins": 0.4455617070198059, |
|
"rewards/real": 0.384282648563385, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7330960854092526e-07, |
|
"logits/generated": 0.23159179091453552, |
|
"logits/real": -0.21682953834533691, |
|
"logps/generated": -519.7667236328125, |
|
"logps/real": -459.9512634277344, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.05122289061546326, |
|
"rewards/margins": 0.4344315528869629, |
|
"rewards/real": 0.383208692073822, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7153024911032028e-07, |
|
"logits/generated": 0.22269535064697266, |
|
"logits/real": -0.19031396508216858, |
|
"logps/generated": -514.4034423828125, |
|
"logps/real": -453.30755615234375, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.036404937505722046, |
|
"rewards/margins": 0.4338456988334656, |
|
"rewards/real": 0.3974407911300659, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.697508896797153e-07, |
|
"logits/generated": 0.23956215381622314, |
|
"logits/real": -0.15907862782478333, |
|
"logps/generated": -536.2980346679688, |
|
"logps/real": -470.60064697265625, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.07160317897796631, |
|
"rewards/margins": 0.4722219407558441, |
|
"rewards/real": 0.4006187319755554, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.679715302491103e-07, |
|
"logits/generated": 0.2078259289264679, |
|
"logits/real": -0.2215595692396164, |
|
"logps/generated": -505.8096618652344, |
|
"logps/real": -450.19012451171875, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.060281794518232346, |
|
"rewards/margins": 0.43966370820999146, |
|
"rewards/real": 0.3793818950653076, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6619217081850535e-07, |
|
"logits/generated": 0.220962256193161, |
|
"logits/real": -0.17713703215122223, |
|
"logps/generated": -537.6134643554688, |
|
"logps/real": -442.96917724609375, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.07121272385120392, |
|
"rewards/margins": 0.46468058228492737, |
|
"rewards/real": 0.39346787333488464, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6441281138790033e-07, |
|
"logits/generated": 0.2232249677181244, |
|
"logits/real": -0.23343662917613983, |
|
"logps/generated": -530.177978515625, |
|
"logps/real": -474.0650939941406, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.07620830088853836, |
|
"rewards/margins": 0.4571600556373596, |
|
"rewards/real": 0.38095176219940186, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6263345195729535e-07, |
|
"logits/generated": 0.22311291098594666, |
|
"logits/real": -0.1836051195859909, |
|
"logps/generated": -513.8787841796875, |
|
"logps/real": -432.8919982910156, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.05465202406048775, |
|
"rewards/margins": 0.45078739523887634, |
|
"rewards/real": 0.3961353898048401, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.608540925266904e-07, |
|
"logits/generated": 0.20984773337841034, |
|
"logits/real": -0.2067348212003708, |
|
"logps/generated": -502.0240783691406, |
|
"logps/real": -450.7012634277344, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.053631316870450974, |
|
"rewards/margins": 0.4417099952697754, |
|
"rewards/real": 0.3880787193775177, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.590747330960854e-07, |
|
"logits/generated": 0.2537307143211365, |
|
"logits/real": -0.15498527884483337, |
|
"logps/generated": -517.6680908203125, |
|
"logps/real": -448.23541259765625, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.033959295600652695, |
|
"rewards/margins": 0.42226237058639526, |
|
"rewards/real": 0.38830310106277466, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5729537366548042e-07, |
|
"logits/generated": 0.21758441627025604, |
|
"logits/real": -0.1972777396440506, |
|
"logps/generated": -523.7211303710938, |
|
"logps/real": -442.3697814941406, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.05707954615354538, |
|
"rewards/margins": 0.46116429567337036, |
|
"rewards/real": 0.4040847420692444, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5551601423487543e-07, |
|
"logits/generated": 0.2385714054107666, |
|
"logits/real": -0.15557484328746796, |
|
"logps/generated": -534.7314453125, |
|
"logps/real": -459.919189453125, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.061738379299640656, |
|
"rewards/margins": 0.46245869994163513, |
|
"rewards/real": 0.40072035789489746, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5373665480427045e-07, |
|
"logits/generated": 0.22526773810386658, |
|
"logits/real": -0.23086409270763397, |
|
"logps/generated": -515.3248291015625, |
|
"logps/real": -480.4952697753906, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.047265470027923584, |
|
"rewards/margins": 0.4348309636116028, |
|
"rewards/real": 0.3875654339790344, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5195729537366546e-07, |
|
"logits/generated": 0.2394159585237503, |
|
"logits/real": -0.1480354219675064, |
|
"logps/generated": -539.8045654296875, |
|
"logps/real": -449.03851318359375, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.06060241535305977, |
|
"rewards/margins": 0.46144571900367737, |
|
"rewards/real": 0.40084323287010193, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.501779359430605e-07, |
|
"logits/generated": 0.20694789290428162, |
|
"logits/real": -0.1889125555753708, |
|
"logps/generated": -497.44586181640625, |
|
"logps/real": -446.2994079589844, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.05379456281661987, |
|
"rewards/margins": 0.4522787928581238, |
|
"rewards/real": 0.3984842300415039, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4839857651245552e-07, |
|
"logits/generated": 0.25509268045425415, |
|
"logits/real": -0.1534079909324646, |
|
"logps/generated": -549.9916381835938, |
|
"logps/real": -462.14581298828125, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.07747825235128403, |
|
"rewards/margins": 0.47670111060142517, |
|
"rewards/real": 0.39922288060188293, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.466192170818505e-07, |
|
"logits/generated": 0.22593429684638977, |
|
"logits/real": -0.13438035547733307, |
|
"logps/generated": -525.7147216796875, |
|
"logps/real": -415.84014892578125, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.07066598534584045, |
|
"rewards/margins": 0.45950788259506226, |
|
"rewards/real": 0.3888419568538666, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4483985765124555e-07, |
|
"logits/generated": 0.24762868881225586, |
|
"logits/real": -0.15819518268108368, |
|
"logps/generated": -552.5275268554688, |
|
"logps/real": -452.3013610839844, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05504069849848747, |
|
"rewards/margins": 0.45216450095176697, |
|
"rewards/real": 0.3971238136291504, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4306049822064056e-07, |
|
"logits/generated": 0.2369476854801178, |
|
"logits/real": -0.14361664652824402, |
|
"logps/generated": -522.8367919921875, |
|
"logps/real": -437.43280029296875, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.04768730327486992, |
|
"rewards/margins": 0.44329744577407837, |
|
"rewards/real": 0.39561015367507935, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4128113879003557e-07, |
|
"logits/generated": 0.21606259047985077, |
|
"logits/real": -0.17811116576194763, |
|
"logps/generated": -521.3794555664062, |
|
"logps/real": -449.33306884765625, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06894604861736298, |
|
"rewards/margins": 0.4709966778755188, |
|
"rewards/real": 0.402050644159317, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3950177935943062e-07, |
|
"logits/generated": 0.2552662491798401, |
|
"logits/real": -0.16689833998680115, |
|
"logps/generated": -517.5098876953125, |
|
"logps/real": -453.36199951171875, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.050110749900341034, |
|
"rewards/margins": 0.43731746077537537, |
|
"rewards/real": 0.38720670342445374, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.377224199288256e-07, |
|
"logits/generated": 0.19484691321849823, |
|
"logits/real": -0.15271511673927307, |
|
"logps/generated": -537.5327758789062, |
|
"logps/real": -430.572509765625, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.07170198112726212, |
|
"rewards/margins": 0.478828489780426, |
|
"rewards/real": 0.4071265161037445, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3594306049822062e-07, |
|
"logits/generated": 0.21546511352062225, |
|
"logits/real": -0.1496323198080063, |
|
"logps/generated": -523.5759887695312, |
|
"logps/real": -446.5785217285156, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/generated": -0.07428407669067383, |
|
"rewards/margins": 0.4780218005180359, |
|
"rewards/real": 0.40373772382736206, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3416370106761566e-07, |
|
"logits/generated": 0.25350579619407654, |
|
"logits/real": -0.1811196357011795, |
|
"logps/generated": -509.49334716796875, |
|
"logps/real": -469.2283630371094, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.04726102203130722, |
|
"rewards/margins": 0.44062432646751404, |
|
"rewards/real": 0.3933633267879486, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3238434163701067e-07, |
|
"logits/generated": 0.19710242748260498, |
|
"logits/real": -0.195444256067276, |
|
"logps/generated": -510.51953125, |
|
"logps/real": -427.2699279785156, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.052245985716581345, |
|
"rewards/margins": 0.44260525703430176, |
|
"rewards/real": 0.3903593122959137, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.306049822064057e-07, |
|
"logits/generated": 0.23108847439289093, |
|
"logits/real": -0.19040106236934662, |
|
"logps/generated": -526.1116943359375, |
|
"logps/real": -457.45245361328125, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -0.07810448855161667, |
|
"rewards/margins": 0.499803364276886, |
|
"rewards/real": 0.4216988682746887, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.288256227758007e-07, |
|
"logits/generated": 0.23808474838733673, |
|
"logits/real": -0.20193028450012207, |
|
"logps/generated": -532.6949462890625, |
|
"logps/real": -462.358642578125, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.05180268734693527, |
|
"rewards/margins": 0.4387210011482239, |
|
"rewards/real": 0.3869183659553528, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/generated": 0.2313157618045807, |
|
"eval_logits/real": -0.1777036488056183, |
|
"eval_logps/generated": -521.87158203125, |
|
"eval_logps/real": -453.7703857421875, |
|
"eval_loss": 0.12271205335855484, |
|
"eval_rewards/accuracies": 0.9415000081062317, |
|
"eval_rewards/generated": -0.05717482790350914, |
|
"eval_rewards/margins": 0.45354583859443665, |
|
"eval_rewards/real": 0.3963710367679596, |
|
"eval_runtime": 467.21, |
|
"eval_samples_per_second": 4.281, |
|
"eval_steps_per_second": 1.07, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2704626334519572e-07, |
|
"logits/generated": 0.22476062178611755, |
|
"logits/real": -0.17389290034770966, |
|
"logps/generated": -521.6498413085938, |
|
"logps/real": -443.27880859375, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.0522431917488575, |
|
"rewards/margins": 0.4405241906642914, |
|
"rewards/real": 0.3882810175418854, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2526690391459073e-07, |
|
"logits/generated": 0.23659996688365936, |
|
"logits/real": -0.19962027668952942, |
|
"logps/generated": -530.6138305664062, |
|
"logps/real": -465.6412658691406, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.08992975950241089, |
|
"rewards/margins": 0.48291119933128357, |
|
"rewards/real": 0.3929814398288727, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2348754448398575e-07, |
|
"logits/generated": 0.24056227505207062, |
|
"logits/real": -0.17281261086463928, |
|
"logps/generated": -519.2171630859375, |
|
"logps/real": -464.92913818359375, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/generated": -0.04756608605384827, |
|
"rewards/margins": 0.43203601241111755, |
|
"rewards/real": 0.3844699263572693, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.217081850533808e-07, |
|
"logits/generated": 0.23706042766571045, |
|
"logits/real": -0.1741897165775299, |
|
"logps/generated": -483.63140869140625, |
|
"logps/real": -462.0602111816406, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.035449109971523285, |
|
"rewards/margins": 0.4352191388607025, |
|
"rewards/real": 0.39976999163627625, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.199288256227758e-07, |
|
"logits/generated": 0.23114576935768127, |
|
"logits/real": -0.14803336560726166, |
|
"logps/generated": -507.52008056640625, |
|
"logps/real": -441.3143615722656, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.04358341172337532, |
|
"rewards/margins": 0.4518836438655853, |
|
"rewards/real": 0.4083002209663391, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1814946619217082e-07, |
|
"logits/generated": 0.23625341057777405, |
|
"logits/real": -0.22369003295898438, |
|
"logps/generated": -506.97796630859375, |
|
"logps/real": -472.6698303222656, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.05624117702245712, |
|
"rewards/margins": 0.44402360916137695, |
|
"rewards/real": 0.3877824544906616, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1637010676156583e-07, |
|
"logits/generated": 0.22912919521331787, |
|
"logits/real": -0.18156306445598602, |
|
"logps/generated": -509.318359375, |
|
"logps/real": -449.29425048828125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.03210196644067764, |
|
"rewards/margins": 0.4248197078704834, |
|
"rewards/real": 0.39271771907806396, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1459074733096084e-07, |
|
"logits/generated": 0.19215625524520874, |
|
"logits/real": -0.18478159606456757, |
|
"logps/generated": -532.9630737304688, |
|
"logps/real": -453.92376708984375, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.048404209315776825, |
|
"rewards/margins": 0.45223093032836914, |
|
"rewards/real": 0.4038267135620117, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1281138790035587e-07, |
|
"logits/generated": 0.2604306638240814, |
|
"logits/real": -0.16049329936504364, |
|
"logps/generated": -514.5932006835938, |
|
"logps/real": -452.393310546875, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.03789561614394188, |
|
"rewards/margins": 0.4359763562679291, |
|
"rewards/real": 0.3980807363986969, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1103202846975089e-07, |
|
"logits/generated": 0.2561563551425934, |
|
"logits/real": -0.18810555338859558, |
|
"logps/generated": -523.1640625, |
|
"logps/real": -469.3626403808594, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.047877997159957886, |
|
"rewards/margins": 0.4385640025138855, |
|
"rewards/real": 0.3906860947608948, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.092526690391459e-07, |
|
"logits/generated": 0.22691071033477783, |
|
"logits/real": -0.16963443160057068, |
|
"logps/generated": -533.8391723632812, |
|
"logps/real": -441.6620178222656, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.06845617294311523, |
|
"rewards/margins": 0.4655072093009949, |
|
"rewards/real": 0.39705103635787964, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0747330960854093e-07, |
|
"logits/generated": 0.23466971516609192, |
|
"logits/real": -0.1737900674343109, |
|
"logps/generated": -523.0357666015625, |
|
"logps/real": -466.3274841308594, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.03422842174768448, |
|
"rewards/margins": 0.43477416038513184, |
|
"rewards/real": 0.40054574608802795, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0569395017793593e-07, |
|
"logits/generated": 0.23214109241962433, |
|
"logits/real": -0.2207547128200531, |
|
"logps/generated": -523.21630859375, |
|
"logps/real": -460.16033935546875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.05244157463312149, |
|
"rewards/margins": 0.4536016881465912, |
|
"rewards/real": 0.4011601507663727, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0391459074733096e-07, |
|
"logits/generated": 0.22818417847156525, |
|
"logits/real": -0.18806496262550354, |
|
"logps/generated": -534.0014038085938, |
|
"logps/real": -461.1249084472656, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.08126085996627808, |
|
"rewards/margins": 0.4781245291233063, |
|
"rewards/real": 0.3968636393547058, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0213523131672597e-07, |
|
"logits/generated": 0.22448763251304626, |
|
"logits/real": -0.19211895763874054, |
|
"logps/generated": -502.9695739746094, |
|
"logps/real": -461.84539794921875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.04918776452541351, |
|
"rewards/margins": 0.4540463387966156, |
|
"rewards/real": 0.4048585891723633, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0035587188612099e-07, |
|
"logits/generated": 0.2139461785554886, |
|
"logits/real": -0.1837858110666275, |
|
"logps/generated": -507.08953857421875, |
|
"logps/real": -462.7340393066406, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.04183631390333176, |
|
"rewards/margins": 0.42796725034713745, |
|
"rewards/real": 0.3861309885978699, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.857651245551602e-08, |
|
"logits/generated": 0.2099412977695465, |
|
"logits/real": -0.2005094587802887, |
|
"logps/generated": -509.3089294433594, |
|
"logps/real": -458.82098388671875, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.0423799529671669, |
|
"rewards/margins": 0.4318648874759674, |
|
"rewards/real": 0.3894849121570587, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.679715302491103e-08, |
|
"logits/generated": 0.24018998444080353, |
|
"logits/real": -0.16420888900756836, |
|
"logps/generated": -515.2698974609375, |
|
"logps/real": -465.8897399902344, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.04603344202041626, |
|
"rewards/margins": 0.4456867277622223, |
|
"rewards/real": 0.3996533453464508, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.501779359430604e-08, |
|
"logits/generated": 0.2142796516418457, |
|
"logits/real": -0.23481447994709015, |
|
"logps/generated": -517.4632568359375, |
|
"logps/real": -457.7532653808594, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.06851876527070999, |
|
"rewards/margins": 0.46548646688461304, |
|
"rewards/real": 0.39696773886680603, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.323843416370106e-08, |
|
"logits/generated": 0.21380500495433807, |
|
"logits/real": -0.1961486041545868, |
|
"logps/generated": -520.6165771484375, |
|
"logps/real": -449.50152587890625, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.057625479996204376, |
|
"rewards/margins": 0.4460286498069763, |
|
"rewards/real": 0.3884032368659973, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.145907473309609e-08, |
|
"logits/generated": 0.23146884143352509, |
|
"logits/real": -0.1590948849916458, |
|
"logps/generated": -525.8096923828125, |
|
"logps/real": -447.4246520996094, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.06719191372394562, |
|
"rewards/margins": 0.43803900480270386, |
|
"rewards/real": 0.3708471357822418, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.96797153024911e-08, |
|
"logits/generated": 0.24106153845787048, |
|
"logits/real": -0.17956070601940155, |
|
"logps/generated": -509.88909912109375, |
|
"logps/real": -451.45361328125, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.053158439695835114, |
|
"rewards/margins": 0.4641904830932617, |
|
"rewards/real": 0.411032110452652, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.790035587188612e-08, |
|
"logits/generated": 0.23294071853160858, |
|
"logits/real": -0.17599083483219147, |
|
"logps/generated": -541.8015747070312, |
|
"logps/real": -449.78802490234375, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.06215289235115051, |
|
"rewards/margins": 0.4786357283592224, |
|
"rewards/real": 0.4164828360080719, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.612099644128114e-08, |
|
"logits/generated": 0.22666697204113007, |
|
"logits/real": -0.17323556542396545, |
|
"logps/generated": -543.7659912109375, |
|
"logps/real": -444.4297790527344, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/generated": -0.07400919497013092, |
|
"rewards/margins": 0.4550935626029968, |
|
"rewards/real": 0.3810843825340271, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.434163701067614e-08, |
|
"logits/generated": 0.21421749889850616, |
|
"logits/real": -0.2157014161348343, |
|
"logps/generated": -510.8021545410156, |
|
"logps/real": -463.74285888671875, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.055482130497694016, |
|
"rewards/margins": 0.45598897337913513, |
|
"rewards/real": 0.4005068242549896, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.256227758007117e-08, |
|
"logits/generated": 0.2465168535709381, |
|
"logits/real": -0.19514866173267365, |
|
"logps/generated": -503.04339599609375, |
|
"logps/real": -482.16339111328125, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.05232943966984749, |
|
"rewards/margins": 0.4523092210292816, |
|
"rewards/real": 0.3999797999858856, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.07829181494662e-08, |
|
"logits/generated": 0.2046203315258026, |
|
"logits/real": -0.22382350265979767, |
|
"logps/generated": -516.7283935546875, |
|
"logps/real": -462.3814392089844, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.061030227690935135, |
|
"rewards/margins": 0.4473925530910492, |
|
"rewards/real": 0.3863622844219208, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.90035587188612e-08, |
|
"logits/generated": 0.23818406462669373, |
|
"logits/real": -0.13241663575172424, |
|
"logps/generated": -552.3175659179688, |
|
"logps/real": -429.982666015625, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.0787079781293869, |
|
"rewards/margins": 0.46521496772766113, |
|
"rewards/real": 0.38650697469711304, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.722419928825623e-08, |
|
"logits/generated": 0.22872892022132874, |
|
"logits/real": -0.1680324524641037, |
|
"logps/generated": -528.6109619140625, |
|
"logps/real": -445.991455078125, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.0603509321808815, |
|
"rewards/margins": 0.462471067905426, |
|
"rewards/real": 0.40212011337280273, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.544483985765124e-08, |
|
"logits/generated": 0.24162690341472626, |
|
"logits/real": -0.16354595124721527, |
|
"logps/generated": -515.4553833007812, |
|
"logps/real": -447.14892578125, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.04346945881843567, |
|
"rewards/margins": 0.4477364122867584, |
|
"rewards/real": 0.40426692366600037, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.366548042704626e-08, |
|
"logits/generated": 0.21535074710845947, |
|
"logits/real": -0.19052883982658386, |
|
"logps/generated": -536.5697021484375, |
|
"logps/real": -434.23016357421875, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.05363154411315918, |
|
"rewards/margins": 0.4495977461338043, |
|
"rewards/real": 0.39596620202064514, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.188612099644127e-08, |
|
"logits/generated": 0.23940858244895935, |
|
"logits/real": -0.153304785490036, |
|
"logps/generated": -519.3319702148438, |
|
"logps/real": -430.232666015625, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.07525327801704407, |
|
"rewards/margins": 0.48004379868507385, |
|
"rewards/real": 0.4047905504703522, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.01067615658363e-08, |
|
"logits/generated": 0.26696276664733887, |
|
"logits/real": -0.1220448836684227, |
|
"logps/generated": -520.37060546875, |
|
"logps/real": -437.6148376464844, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.03939700126647949, |
|
"rewards/margins": 0.44409412145614624, |
|
"rewards/real": 0.4046971797943115, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.832740213523131e-08, |
|
"logits/generated": 0.26054686307907104, |
|
"logits/real": -0.16391593217849731, |
|
"logps/generated": -517.5562744140625, |
|
"logps/real": -444.6243591308594, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/generated": -0.03623567894101143, |
|
"rewards/margins": 0.4285515248775482, |
|
"rewards/real": 0.3923158347606659, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.654804270462633e-08, |
|
"logits/generated": 0.19792599976062775, |
|
"logits/real": -0.18831519782543182, |
|
"logps/generated": -526.7872924804688, |
|
"logps/real": -425.1585388183594, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.060060203075408936, |
|
"rewards/margins": 0.45477867126464844, |
|
"rewards/real": 0.3947184681892395, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.476868327402136e-08, |
|
"logits/generated": 0.25231215357780457, |
|
"logits/real": -0.12078098207712173, |
|
"logps/generated": -523.71435546875, |
|
"logps/real": -404.197509765625, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.055614911019802094, |
|
"rewards/margins": 0.4722030758857727, |
|
"rewards/real": 0.4165882170200348, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.298932384341636e-08, |
|
"logits/generated": 0.22361817955970764, |
|
"logits/real": -0.1951177716255188, |
|
"logps/generated": -527.4632568359375, |
|
"logps/real": -449.739990234375, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.04769791662693024, |
|
"rewards/margins": 0.44549164175987244, |
|
"rewards/real": 0.397793710231781, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.120996441281139e-08, |
|
"logits/generated": 0.235061377286911, |
|
"logits/real": -0.19142548739910126, |
|
"logps/generated": -500.3617248535156, |
|
"logps/real": -432.40045166015625, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.03681398928165436, |
|
"rewards/margins": 0.4336830675601959, |
|
"rewards/real": 0.396869033575058, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.94306049822064e-08, |
|
"logits/generated": 0.23789739608764648, |
|
"logits/real": -0.20226213335990906, |
|
"logps/generated": -524.2302856445312, |
|
"logps/real": -473.21124267578125, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.04299360513687134, |
|
"rewards/margins": 0.41463667154312134, |
|
"rewards/real": 0.3716430068016052, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.765124555160142e-08, |
|
"logits/generated": 0.209858700633049, |
|
"logits/real": -0.17679789662361145, |
|
"logps/generated": -527.78076171875, |
|
"logps/real": -436.3880310058594, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.07421739399433136, |
|
"rewards/margins": 0.49320459365844727, |
|
"rewards/real": 0.41898712515830994, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.587188612099644e-08, |
|
"logits/generated": 0.22804374992847443, |
|
"logits/real": -0.17837099730968475, |
|
"logps/generated": -531.051513671875, |
|
"logps/real": -454.724609375, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.0480850450694561, |
|
"rewards/margins": 0.4538131356239319, |
|
"rewards/real": 0.4057280421257019, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4092526690391456e-08, |
|
"logits/generated": 0.23690180480480194, |
|
"logits/real": -0.19386790692806244, |
|
"logps/generated": -530.9876098632812, |
|
"logps/real": -470.317138671875, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.055680133402347565, |
|
"rewards/margins": 0.45267653465270996, |
|
"rewards/real": 0.3969964385032654, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.231316725978648e-08, |
|
"logits/generated": 0.2339600771665573, |
|
"logits/real": -0.10314682871103287, |
|
"logps/generated": -542.47265625, |
|
"logps/real": -420.5494079589844, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -0.0906255766749382, |
|
"rewards/margins": 0.5058237910270691, |
|
"rewards/real": 0.4151982367038727, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.053380782918149e-08, |
|
"logits/generated": 0.24456700682640076, |
|
"logits/real": -0.1966424584388733, |
|
"logps/generated": -538.0858154296875, |
|
"logps/real": -469.58837890625, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -0.06548374891281128, |
|
"rewards/margins": 0.4630776345729828, |
|
"rewards/real": 0.3975939154624939, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8754448398576507e-08, |
|
"logits/generated": 0.23372788727283478, |
|
"logits/real": -0.18431547284126282, |
|
"logps/generated": -533.2066650390625, |
|
"logps/real": -444.8937072753906, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.05459724739193916, |
|
"rewards/margins": 0.4527161717414856, |
|
"rewards/real": 0.39811891317367554, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.697508896797153e-08, |
|
"logits/generated": 0.23714499175548553, |
|
"logits/real": -0.1850479245185852, |
|
"logps/generated": -504.242431640625, |
|
"logps/real": -466.57415771484375, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.038482390344142914, |
|
"rewards/margins": 0.418579638004303, |
|
"rewards/real": 0.38009724020957947, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.519572953736655e-08, |
|
"logits/generated": 0.21894851326942444, |
|
"logits/real": -0.28000861406326294, |
|
"logps/generated": -500.18115234375, |
|
"logps/real": -503.328369140625, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.05481521040201187, |
|
"rewards/margins": 0.4618280529975891, |
|
"rewards/real": 0.40701285004615784, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.341637010676156e-08, |
|
"logits/generated": 0.23817463219165802, |
|
"logits/real": -0.12684933841228485, |
|
"logps/generated": -529.4266357421875, |
|
"logps/real": -412.86907958984375, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.05582699924707413, |
|
"rewards/margins": 0.4807185232639313, |
|
"rewards/real": 0.42489147186279297, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1637010676156584e-08, |
|
"logits/generated": 0.244916170835495, |
|
"logits/real": -0.18898700177669525, |
|
"logps/generated": -527.6209716796875, |
|
"logps/real": -472.571533203125, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.04574694111943245, |
|
"rewards/margins": 0.4711214601993561, |
|
"rewards/real": 0.42537444829940796, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.98576512455516e-08, |
|
"logits/generated": 0.209895521402359, |
|
"logits/real": -0.13670508563518524, |
|
"logps/generated": -502.51922607421875, |
|
"logps/real": -424.85418701171875, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.02930234745144844, |
|
"rewards/margins": 0.4223732054233551, |
|
"rewards/real": 0.39307087659835815, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.807829181494661e-08, |
|
"logits/generated": 0.21665044128894806, |
|
"logits/real": -0.1499323546886444, |
|
"logps/generated": -528.1993408203125, |
|
"logps/real": -411.34130859375, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.053298868238925934, |
|
"rewards/margins": 0.46004271507263184, |
|
"rewards/real": 0.4067438244819641, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6298932384341634e-08, |
|
"logits/generated": 0.22750067710876465, |
|
"logits/real": -0.18200306594371796, |
|
"logps/generated": -515.8609008789062, |
|
"logps/real": -437.69927978515625, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.037331800907850266, |
|
"rewards/margins": 0.4469398558139801, |
|
"rewards/real": 0.40960803627967834, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4519572953736656e-08, |
|
"logits/generated": 0.22528275847434998, |
|
"logits/real": -0.201947420835495, |
|
"logps/generated": -522.4163208007812, |
|
"logps/real": -452.88427734375, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/generated": -0.05317317321896553, |
|
"rewards/margins": 0.4387715458869934, |
|
"rewards/real": 0.385598361492157, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.274021352313167e-08, |
|
"logits/generated": 0.24460799992084503, |
|
"logits/real": -0.15885956585407257, |
|
"logps/generated": -528.5340576171875, |
|
"logps/real": -438.3102111816406, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/generated": -0.056962721049785614, |
|
"rewards/margins": 0.44773292541503906, |
|
"rewards/real": 0.39077019691467285, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.096085409252669e-08, |
|
"logits/generated": 0.23852023482322693, |
|
"logits/real": -0.17653930187225342, |
|
"logps/generated": -513.607666015625, |
|
"logps/real": -450.1045837402344, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.04695012420415878, |
|
"rewards/margins": 0.45696502923965454, |
|
"rewards/real": 0.41001492738723755, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9181494661921706e-08, |
|
"logits/generated": 0.2057987004518509, |
|
"logits/real": -0.19016572833061218, |
|
"logps/generated": -544.1546630859375, |
|
"logps/real": -443.2815856933594, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/generated": -0.04910854250192642, |
|
"rewards/margins": 0.4497135579586029, |
|
"rewards/real": 0.4006050229072571, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.7402135231316727e-08, |
|
"logits/generated": 0.2639480233192444, |
|
"logits/real": -0.13512758910655975, |
|
"logps/generated": -537.3682861328125, |
|
"logps/real": -450.20721435546875, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.07757305353879929, |
|
"rewards/margins": 0.4825238287448883, |
|
"rewards/real": 0.4049507677555084, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.562277580071174e-08, |
|
"logits/generated": 0.22935017943382263, |
|
"logits/real": -0.15518589317798615, |
|
"logps/generated": -541.0477294921875, |
|
"logps/real": -439.45721435546875, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.06261979043483734, |
|
"rewards/margins": 0.4490610957145691, |
|
"rewards/real": 0.38644129037857056, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.384341637010676e-08, |
|
"logits/generated": 0.2474227398633957, |
|
"logits/real": -0.23404696583747864, |
|
"logps/generated": -522.2889404296875, |
|
"logps/real": -471.97772216796875, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.05089433863759041, |
|
"rewards/margins": 0.43490394949913025, |
|
"rewards/real": 0.38400956988334656, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.206405693950178e-08, |
|
"logits/generated": 0.22496287524700165, |
|
"logits/real": -0.1576504409313202, |
|
"logps/generated": -545.5980834960938, |
|
"logps/real": -434.0711975097656, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.08410739153623581, |
|
"rewards/margins": 0.47313404083251953, |
|
"rewards/real": 0.3890266716480255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/generated": 0.227307990193367, |
|
"eval_logits/real": -0.18104109168052673, |
|
"eval_logps/generated": -521.93017578125, |
|
"eval_logps/real": -453.7297668457031, |
|
"eval_loss": 0.12231329083442688, |
|
"eval_rewards/accuracies": 0.9465000033378601, |
|
"eval_rewards/generated": -0.0630280077457428, |
|
"eval_rewards/margins": 0.4634554088115692, |
|
"eval_rewards/real": 0.4004274308681488, |
|
"eval_runtime": 463.6445, |
|
"eval_samples_per_second": 4.314, |
|
"eval_steps_per_second": 1.078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0284697508896795e-08, |
|
"logits/generated": 0.2287018746137619, |
|
"logits/real": -0.1820969432592392, |
|
"logps/generated": -505.2705078125, |
|
"logps/real": -460.46954345703125, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.05245542526245117, |
|
"rewards/margins": 0.46576395630836487, |
|
"rewards/real": 0.4133085310459137, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.8505338078291812e-08, |
|
"logits/generated": 0.23314492404460907, |
|
"logits/real": -0.18071797490119934, |
|
"logps/generated": -516.2825317382812, |
|
"logps/real": -446.4068908691406, |
|
"loss": 0.1226, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.06283563375473022, |
|
"rewards/margins": 0.463799387216568, |
|
"rewards/real": 0.40096378326416016, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6725978647686833e-08, |
|
"logits/generated": 0.23628249764442444, |
|
"logits/real": -0.19212110340595245, |
|
"logps/generated": -526.8966064453125, |
|
"logps/real": -465.02606201171875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/generated": -0.06296736001968384, |
|
"rewards/margins": 0.4659939706325531, |
|
"rewards/real": 0.4030265808105469, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.494661921708185e-08, |
|
"logits/generated": 0.22333106398582458, |
|
"logits/real": -0.1808868944644928, |
|
"logps/generated": -537.9901123046875, |
|
"logps/real": -449.5810546875, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.03545105457305908, |
|
"rewards/margins": 0.45481738448143005, |
|
"rewards/real": 0.41936635971069336, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3167259786476867e-08, |
|
"logits/generated": 0.203715518116951, |
|
"logits/real": -0.20505018532276154, |
|
"logps/generated": -514.4688720703125, |
|
"logps/real": -435.12103271484375, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/generated": -0.062085770070552826, |
|
"rewards/margins": 0.444580078125, |
|
"rewards/real": 0.3824942708015442, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1387900355871887e-08, |
|
"logits/generated": 0.24170592427253723, |
|
"logits/real": -0.15465418994426727, |
|
"logps/generated": -538.7681884765625, |
|
"logps/real": -460.85333251953125, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/generated": -0.0660967007279396, |
|
"rewards/margins": 0.47000962495803833, |
|
"rewards/real": 0.4039129316806793, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.608540925266903e-09, |
|
"logits/generated": 0.2076333463191986, |
|
"logits/real": -0.25217288732528687, |
|
"logps/generated": -501.05328369140625, |
|
"logps/real": -470.36285400390625, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -0.05864990875124931, |
|
"rewards/margins": 0.461745023727417, |
|
"rewards/real": 0.4030950963497162, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.82918149466192e-09, |
|
"logits/generated": 0.21803216636180878, |
|
"logits/real": -0.22686973214149475, |
|
"logps/generated": -513.02392578125, |
|
"logps/real": -461.0308532714844, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/generated": -0.046993426978588104, |
|
"rewards/margins": 0.4582246243953705, |
|
"rewards/real": 0.41123121976852417, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.049822064056939e-09, |
|
"logits/generated": 0.23574039340019226, |
|
"logits/real": -0.15900997817516327, |
|
"logps/generated": -513.01025390625, |
|
"logps/real": -451.59881591796875, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/generated": -0.06016629934310913, |
|
"rewards/margins": 0.48446398973464966, |
|
"rewards/real": 0.42429766058921814, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.270462633451957e-09, |
|
"logits/generated": 0.21644452214241028, |
|
"logits/real": -0.22703060507774353, |
|
"logps/generated": -519.6123046875, |
|
"logps/real": -449.2998046875, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/generated": -0.05588964372873306, |
|
"rewards/margins": 0.45251893997192383, |
|
"rewards/real": 0.396629273891449, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.491103202846975e-09, |
|
"logits/generated": 0.22004759311676025, |
|
"logits/real": -0.19879832863807678, |
|
"logps/generated": -508.9859313964844, |
|
"logps/real": -448.991943359375, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/generated": -0.04875675216317177, |
|
"rewards/margins": 0.44634467363357544, |
|
"rewards/real": 0.3975878953933716, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.117437722419929e-10, |
|
"logits/generated": 0.2379693239927292, |
|
"logits/real": -0.15108105540275574, |
|
"logps/generated": -523.8177490234375, |
|
"logps/real": -426.63323974609375, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/generated": -0.03124368190765381, |
|
"rewards/margins": 0.4431038498878479, |
|
"rewards/real": 0.41186007857322693, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1562, |
|
"total_flos": 0.0, |
|
"train_loss": 0.12871409855334615, |
|
"train_runtime": 38810.2478, |
|
"train_samples_per_second": 2.577, |
|
"train_steps_per_second": 0.04 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1562, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|