|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1719, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9069767441860465e-09, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -585.4288330078125, |
|
"logps/real": -416.062255859375, |
|
"loss": 2.0516, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -26.3916072845459, |
|
"rewards/margins": 15.072293281555176, |
|
"rewards/real": -11.319311141967773, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9069767441860464e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -636.1840209960938, |
|
"logps/real": -385.2939453125, |
|
"loss": 3.5146, |
|
"rewards/accuracies": 0.6805555820465088, |
|
"rewards/generated": -20.802650451660156, |
|
"rewards/margins": 6.726568222045898, |
|
"rewards/real": -14.076082229614258, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.813953488372093e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -624.8544311523438, |
|
"logps/real": -390.0328674316406, |
|
"loss": 3.0526, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/generated": -23.875038146972656, |
|
"rewards/margins": 11.404947280883789, |
|
"rewards/real": -12.470093727111816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.720930232558139e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -569.0723266601562, |
|
"logps/real": -370.02374267578125, |
|
"loss": 3.4523, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -19.19052505493164, |
|
"rewards/margins": 7.997351169586182, |
|
"rewards/real": -11.193175315856934, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1627906976744186e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -579.5658569335938, |
|
"logps/real": -411.82049560546875, |
|
"loss": 3.4731, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -18.900074005126953, |
|
"rewards/margins": 7.000410556793213, |
|
"rewards/real": -11.899663925170898, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4534883720930232e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -556.8775024414062, |
|
"logps/real": -356.0643310546875, |
|
"loss": 2.9526, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/generated": -19.157987594604492, |
|
"rewards/margins": 6.662176609039307, |
|
"rewards/real": -12.495813369750977, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7441860465116279e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -590.6502075195312, |
|
"logps/real": -382.5639343261719, |
|
"loss": 2.8991, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -20.656909942626953, |
|
"rewards/margins": 8.879560470581055, |
|
"rewards/real": -11.777349472045898, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0348837209302325e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -611.8815307617188, |
|
"logps/real": -361.37445068359375, |
|
"loss": 1.9164, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/generated": -23.409746170043945, |
|
"rewards/margins": 12.026899337768555, |
|
"rewards/real": -11.382845878601074, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3255813953488372e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -572.3549194335938, |
|
"logps/real": -345.2603454589844, |
|
"loss": 2.9112, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -21.22520637512207, |
|
"rewards/margins": 10.52419376373291, |
|
"rewards/real": -10.701011657714844, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.616279069767442e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -617.297119140625, |
|
"logps/real": -376.9234619140625, |
|
"loss": 2.8052, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/generated": -24.289501190185547, |
|
"rewards/margins": 10.610671043395996, |
|
"rewards/real": -13.6788330078125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9069767441860464e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -659.3673095703125, |
|
"logps/real": -397.09442138671875, |
|
"loss": 1.9098, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -30.52129554748535, |
|
"rewards/margins": 15.818142890930176, |
|
"rewards/real": -14.703153610229492, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1976744186046514e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -753.0693359375, |
|
"logps/real": -422.36474609375, |
|
"loss": 1.3731, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -34.5720329284668, |
|
"rewards/margins": 20.563434600830078, |
|
"rewards/real": -14.008600234985352, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4883720930232557e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -754.9771728515625, |
|
"logps/real": -381.25775146484375, |
|
"loss": 0.9719, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -34.03357696533203, |
|
"rewards/margins": 22.683399200439453, |
|
"rewards/real": -11.350174903869629, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.77906976744186e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -775.6074829101562, |
|
"logps/real": -384.8584289550781, |
|
"loss": 1.1633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -35.74102020263672, |
|
"rewards/margins": 22.70891761779785, |
|
"rewards/real": -13.032096862792969, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.069767441860465e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -747.3997802734375, |
|
"logps/real": -364.2508239746094, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -36.73775863647461, |
|
"rewards/margins": 25.776050567626953, |
|
"rewards/real": -10.961711883544922, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3604651162790694e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -791.8533935546875, |
|
"logps/real": -391.7900695800781, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -43.14397048950195, |
|
"rewards/margins": 30.07157325744629, |
|
"rewards/real": -13.07239055633545, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6511627906976743e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -838.5374755859375, |
|
"logps/real": -418.5439453125, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -46.2900505065918, |
|
"rewards/margins": 31.113073348999023, |
|
"rewards/real": -15.176969528198242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.941860465116279e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -877.0693359375, |
|
"logps/real": -374.66131591796875, |
|
"loss": 0.2794, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -49.27599334716797, |
|
"rewards/margins": 36.6387825012207, |
|
"rewards/real": -12.637211799621582, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.974143503555268e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -887.89404296875, |
|
"logps/real": -414.8448791503906, |
|
"loss": 0.3793, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -52.5975341796875, |
|
"rewards/margins": 36.9406852722168, |
|
"rewards/real": -15.65684986114502, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.941822882999354e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -901.4552612304688, |
|
"logps/real": -360.4660339355469, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -52.28190994262695, |
|
"rewards/margins": 38.190025329589844, |
|
"rewards/real": -14.091883659362793, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.909502262443438e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1001.7427978515625, |
|
"logps/real": -355.1522521972656, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -63.00849533081055, |
|
"rewards/margins": 50.70174026489258, |
|
"rewards/real": -12.306763648986816, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877181641887524e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1004.9097900390625, |
|
"logps/real": -406.89959716796875, |
|
"loss": 0.2313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -62.05937576293945, |
|
"rewards/margins": 47.54924011230469, |
|
"rewards/real": -14.51012897491455, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.84486102133161e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -955.6048583984375, |
|
"logps/real": -349.3613586425781, |
|
"loss": 0.2353, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -57.845489501953125, |
|
"rewards/margins": 46.22804641723633, |
|
"rewards/real": -11.617445945739746, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.812540400775695e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1023.615234375, |
|
"logps/real": -367.34765625, |
|
"loss": 0.3127, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -62.7391242980957, |
|
"rewards/margins": 49.16623306274414, |
|
"rewards/real": -13.572894096374512, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.78021978021978e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1015.4310302734375, |
|
"logps/real": -396.3680725097656, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -62.57373046875, |
|
"rewards/margins": 49.165565490722656, |
|
"rewards/real": -13.408163070678711, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747899159663865e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1024.803955078125, |
|
"logps/real": -364.30267333984375, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -63.769630432128906, |
|
"rewards/margins": 52.81931686401367, |
|
"rewards/real": -10.950313568115234, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7155785391079506e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1095.012939453125, |
|
"logps/real": -373.0534973144531, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -71.70127868652344, |
|
"rewards/margins": 59.2219352722168, |
|
"rewards/real": -12.479339599609375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.683257918552036e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1077.7110595703125, |
|
"logps/real": -368.59576416015625, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -70.36756896972656, |
|
"rewards/margins": 56.93220901489258, |
|
"rewards/real": -13.435358047485352, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6509372979961214e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1050.4290771484375, |
|
"logps/real": -363.2455749511719, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -65.6029052734375, |
|
"rewards/margins": 54.51910400390625, |
|
"rewards/real": -11.083802223205566, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.618616677440207e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1103.069091796875, |
|
"logps/real": -349.0497131347656, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.5141372680664, |
|
"rewards/margins": 61.3436279296875, |
|
"rewards/real": -11.170514106750488, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5862960568842917e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1056.5870361328125, |
|
"logps/real": -398.55908203125, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -64.16098022460938, |
|
"rewards/margins": 49.70214080810547, |
|
"rewards/real": -14.458834648132324, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5539754363283774e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1053.408203125, |
|
"logps/real": -381.6204528808594, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -65.08711242675781, |
|
"rewards/margins": 52.69770431518555, |
|
"rewards/real": -12.389402389526367, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5216548157724625e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1127.9320068359375, |
|
"logps/real": -335.8224182128906, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -73.03179931640625, |
|
"rewards/margins": 62.3694953918457, |
|
"rewards/real": -10.662309646606445, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.489334195216548e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1167.7669677734375, |
|
"logps/real": -377.48004150390625, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -75.26861572265625, |
|
"rewards/margins": 63.2924919128418, |
|
"rewards/real": -11.976118087768555, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4570135746606334e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1116.4605712890625, |
|
"logps/real": -348.2418518066406, |
|
"loss": 0.1536, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -72.23265838623047, |
|
"rewards/margins": 60.4597053527832, |
|
"rewards/real": -11.772956848144531, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4246929541047185e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1093.644775390625, |
|
"logps/real": -337.54681396484375, |
|
"loss": 0.136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.0814437866211, |
|
"rewards/margins": 67.3147201538086, |
|
"rewards/real": -8.766722679138184, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3923723335488036e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1109.8046875, |
|
"logps/real": -403.28594970703125, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -70.68810272216797, |
|
"rewards/margins": 57.72324752807617, |
|
"rewards/real": -12.964859008789062, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3600517129928893e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1270.935546875, |
|
"logps/real": -380.7652587890625, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -87.1976089477539, |
|
"rewards/margins": 74.49917602539062, |
|
"rewards/real": -12.698437690734863, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.327731092436975e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1192.004150390625, |
|
"logps/real": -412.72174072265625, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -81.01669311523438, |
|
"rewards/margins": 66.32322692871094, |
|
"rewards/real": -14.693460464477539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2954104718810596e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1192.2940673828125, |
|
"logps/real": -356.91265869140625, |
|
"loss": 0.071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.30943298339844, |
|
"rewards/margins": 68.93203735351562, |
|
"rewards/real": -12.377397537231445, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2630898513251453e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1197.5316162109375, |
|
"logps/real": -417.83807373046875, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -82.4712905883789, |
|
"rewards/margins": 66.75960540771484, |
|
"rewards/real": -15.711677551269531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1340.131591796875, |
|
"logps/real": -362.13726806640625, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -94.64201354980469, |
|
"rewards/margins": 83.68933868408203, |
|
"rewards/real": -10.952683448791504, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.198448610213316e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1159.6007080078125, |
|
"logps/real": -372.16943359375, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -77.62410736083984, |
|
"rewards/margins": 66.4634017944336, |
|
"rewards/real": -11.160701751708984, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.166127989657401e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1317.0751953125, |
|
"logps/real": -357.87347412109375, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -93.65202331542969, |
|
"rewards/margins": 82.01255798339844, |
|
"rewards/real": -11.639452934265137, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1338073691014864e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1271.5760498046875, |
|
"logps/real": -382.9284973144531, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -89.10494232177734, |
|
"rewards/margins": 76.42890930175781, |
|
"rewards/real": -12.67604923248291, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1014867485455715e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1289.350341796875, |
|
"logps/real": -389.0955810546875, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -87.45140075683594, |
|
"rewards/margins": 75.87107849121094, |
|
"rewards/real": -11.580324172973633, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.069166127989657e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1280.599853515625, |
|
"logps/real": -371.68017578125, |
|
"loss": 0.059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -92.14776611328125, |
|
"rewards/margins": 80.02830505371094, |
|
"rewards/real": -12.119465827941895, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.036845507433743e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1261.142333984375, |
|
"logps/real": -417.6834411621094, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -90.04469299316406, |
|
"rewards/margins": 76.96453857421875, |
|
"rewards/real": -13.080148696899414, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.004524886877828e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1229.6658935546875, |
|
"logps/real": -367.977783203125, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -87.21217346191406, |
|
"rewards/margins": 77.89546966552734, |
|
"rewards/real": -9.316702842712402, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.972204266321913e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1207.0528564453125, |
|
"logps/real": -384.0739440917969, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.83928680419922, |
|
"rewards/margins": 72.51162719726562, |
|
"rewards/real": -9.327655792236328, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9398836457659983e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1229.6376953125, |
|
"logps/real": -374.444091796875, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -85.55473327636719, |
|
"rewards/margins": 73.45203399658203, |
|
"rewards/real": -12.102694511413574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/generated": -Infinity, |
|
"eval_logits/real": -Infinity, |
|
"eval_logps/generated": -876.169189453125, |
|
"eval_logps/real": -258.8595275878906, |
|
"eval_loss": 0.019135773181915283, |
|
"eval_rewards/accuracies": 0.9921875, |
|
"eval_rewards/generated": -52.90799331665039, |
|
"eval_rewards/margins": 52.664222717285156, |
|
"eval_rewards/real": -0.24376755952835083, |
|
"eval_runtime": 54.127, |
|
"eval_samples_per_second": 9.238, |
|
"eval_steps_per_second": 0.296, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.907563025210084e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1148.657470703125, |
|
"logps/real": -329.05780029296875, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -79.50115966796875, |
|
"rewards/margins": 72.13102722167969, |
|
"rewards/real": -7.370133399963379, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.875242404654169e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1202.630615234375, |
|
"logps/real": -361.89556884765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -82.56840515136719, |
|
"rewards/margins": 70.6963119506836, |
|
"rewards/real": -11.872096061706543, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.842921784098255e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1241.448486328125, |
|
"logps/real": -353.6575927734375, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -86.03369140625, |
|
"rewards/margins": 74.09725189208984, |
|
"rewards/real": -11.936434745788574, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8106011635423394e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1298.4600830078125, |
|
"logps/real": -388.0162658691406, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -92.33797454833984, |
|
"rewards/margins": 79.91946411132812, |
|
"rewards/real": -12.41850471496582, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.778280542986425e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1269.831787109375, |
|
"logps/real": -390.52630615234375, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -88.11750030517578, |
|
"rewards/margins": 75.70476531982422, |
|
"rewards/real": -12.412723541259766, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.745959922430511e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1346.3272705078125, |
|
"logps/real": -368.40753173828125, |
|
"loss": 0.1542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -96.91593933105469, |
|
"rewards/margins": 85.29218292236328, |
|
"rewards/real": -11.623759269714355, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.713639301874596e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1247.3770751953125, |
|
"logps/real": -388.4643859863281, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -88.77118682861328, |
|
"rewards/margins": 76.88682556152344, |
|
"rewards/real": -11.884347915649414, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6813186813186816e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1353.050048828125, |
|
"logps/real": -406.52020263671875, |
|
"loss": 0.044, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -95.89773559570312, |
|
"rewards/margins": 82.00030517578125, |
|
"rewards/real": -13.897436141967773, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.648998060762766e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1425.5377197265625, |
|
"logps/real": -352.7586669921875, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -101.71784973144531, |
|
"rewards/margins": 90.94731140136719, |
|
"rewards/real": -10.770530700683594, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.616677440206852e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1402.710693359375, |
|
"logps/real": -397.00811767578125, |
|
"loss": 0.059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -101.71532440185547, |
|
"rewards/margins": 90.09410095214844, |
|
"rewards/real": -11.621208190917969, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.584356819650937e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1376.6851806640625, |
|
"logps/real": -387.17620849609375, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -98.29673767089844, |
|
"rewards/margins": 84.26631164550781, |
|
"rewards/real": -14.030441284179688, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5520361990950227e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1359.436279296875, |
|
"logps/real": -364.4646911621094, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -98.44429016113281, |
|
"rewards/margins": 86.33500671386719, |
|
"rewards/real": -12.109277725219727, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.519715578539108e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1330.5174560546875, |
|
"logps/real": -389.8196716308594, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -95.7861099243164, |
|
"rewards/margins": 81.63475036621094, |
|
"rewards/real": -14.151357650756836, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.487394957983193e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1381.9146728515625, |
|
"logps/real": -391.81793212890625, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -100.78105163574219, |
|
"rewards/margins": 86.04339599609375, |
|
"rewards/real": -14.7376708984375, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4550743374272786e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1447.9072265625, |
|
"logps/real": -369.33843994140625, |
|
"loss": 0.064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.63130187988281, |
|
"rewards/margins": 94.77674865722656, |
|
"rewards/real": -12.854547500610352, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.422753716871364e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1418.199462890625, |
|
"logps/real": -381.74945068359375, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.5382308959961, |
|
"rewards/margins": 91.29570770263672, |
|
"rewards/real": -12.24254035949707, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3904330963154494e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1388.025146484375, |
|
"logps/real": -358.68310546875, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.21788024902344, |
|
"rewards/margins": 91.6312255859375, |
|
"rewards/real": -11.58665657043457, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.358112475759534e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1355.957763671875, |
|
"logps/real": -396.1421813964844, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -98.50040435791016, |
|
"rewards/margins": 85.23402404785156, |
|
"rewards/real": -13.266378402709961, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3257918552036197e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1305.5859375, |
|
"logps/real": -377.94146728515625, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -95.04072570800781, |
|
"rewards/margins": 81.69560241699219, |
|
"rewards/real": -13.345118522644043, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.293471234647705e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1390.365966796875, |
|
"logps/real": -367.5549621582031, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -98.4721908569336, |
|
"rewards/margins": 87.40778350830078, |
|
"rewards/real": -11.064404487609863, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2611506140917905e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1216.620849609375, |
|
"logps/real": -388.883056640625, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -87.7797622680664, |
|
"rewards/margins": 74.78289031982422, |
|
"rewards/real": -12.996874809265137, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2288299935358757e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1357.4361572265625, |
|
"logps/real": -399.61346435546875, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -100.69035339355469, |
|
"rewards/margins": 86.53223419189453, |
|
"rewards/real": -14.158121109008789, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.196509372979961e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1464.405029296875, |
|
"logps/real": -382.9228515625, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.7691650390625, |
|
"rewards/margins": 99.68670654296875, |
|
"rewards/real": -11.082452774047852, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1641887524240465e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1414.446044921875, |
|
"logps/real": -374.07122802734375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -101.88261413574219, |
|
"rewards/margins": 89.86210632324219, |
|
"rewards/real": -12.02051067352295, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1318681318681316e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1603.879150390625, |
|
"logps/real": -369.9591369628906, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -120.94793701171875, |
|
"rewards/margins": 108.86351013183594, |
|
"rewards/real": -12.08442497253418, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0995475113122173e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1366.385009765625, |
|
"logps/real": -353.4041442871094, |
|
"loss": 0.1729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -97.85880279541016, |
|
"rewards/margins": 87.17445373535156, |
|
"rewards/real": -10.684345245361328, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0672268907563024e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1272.14111328125, |
|
"logps/real": -347.1465759277344, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -85.72208404541016, |
|
"rewards/margins": 75.60896301269531, |
|
"rewards/real": -10.11312484741211, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0349062702003876e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1423.6280517578125, |
|
"logps/real": -403.7122802734375, |
|
"loss": 0.1137, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -106.06524658203125, |
|
"rewards/margins": 92.48087310791016, |
|
"rewards/real": -13.584379196166992, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0025856496444727e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1462.460693359375, |
|
"logps/real": -382.37994384765625, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.1926498413086, |
|
"rewards/margins": 95.11514282226562, |
|
"rewards/real": -12.077507972717285, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9702650290885584e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1400.0626220703125, |
|
"logps/real": -359.57379150390625, |
|
"loss": 0.016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -100.73733520507812, |
|
"rewards/margins": 88.53291320800781, |
|
"rewards/real": -12.2044095993042, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9379444085326436e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1463.847900390625, |
|
"logps/real": -339.82904052734375, |
|
"loss": 0.054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -108.5577621459961, |
|
"rewards/margins": 97.10833740234375, |
|
"rewards/real": -11.44942855834961, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.905623787976729e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1409.522705078125, |
|
"logps/real": -361.0937805175781, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.07427978515625, |
|
"rewards/margins": 90.65509796142578, |
|
"rewards/real": -12.419183731079102, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8733031674208144e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1457.8416748046875, |
|
"logps/real": -379.302734375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -108.97965240478516, |
|
"rewards/margins": 97.12313079833984, |
|
"rewards/real": -11.856520652770996, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8409825468648995e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1403.0084228515625, |
|
"logps/real": -334.5611267089844, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -100.14570617675781, |
|
"rewards/margins": 89.7511215209961, |
|
"rewards/real": -10.394584655761719, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.808661926308985e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1547.0623779296875, |
|
"logps/real": -387.4636535644531, |
|
"loss": 0.0237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.20843505859375, |
|
"rewards/margins": 104.43153381347656, |
|
"rewards/real": -13.776911735534668, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7763413057530703e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1471.768310546875, |
|
"logps/real": -413.35833740234375, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.3841552734375, |
|
"rewards/margins": 96.55322265625, |
|
"rewards/real": -13.830926895141602, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.744020685197156e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1375.001708984375, |
|
"logps/real": -388.19427490234375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -100.31910705566406, |
|
"rewards/margins": 88.93052673339844, |
|
"rewards/real": -11.38856315612793, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7117000646412406e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1420.671142578125, |
|
"logps/real": -390.4577331542969, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -105.24947357177734, |
|
"rewards/margins": 92.76331329345703, |
|
"rewards/real": -12.486169815063477, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6793794440853263e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1468.72265625, |
|
"logps/real": -371.2659606933594, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.34748840332031, |
|
"rewards/margins": 96.87297821044922, |
|
"rewards/real": -12.474508285522461, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1494.6917724609375, |
|
"logps/real": -386.06591796875, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -111.42431640625, |
|
"rewards/margins": 98.02787780761719, |
|
"rewards/real": -13.39643669128418, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.614738202973497e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1476.820556640625, |
|
"logps/real": -340.6576232910156, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -108.3106460571289, |
|
"rewards/margins": 95.98829650878906, |
|
"rewards/real": -12.322352409362793, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.582417582417583e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1344.0067138671875, |
|
"logps/real": -384.3983459472656, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -96.03278350830078, |
|
"rewards/margins": 81.01734924316406, |
|
"rewards/real": -15.01544189453125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5500969618616674e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1489.7938232421875, |
|
"logps/real": -356.8852233886719, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -112.2257080078125, |
|
"rewards/margins": 101.8528823852539, |
|
"rewards/real": -10.37283706665039, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.517776341305753e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1498.08447265625, |
|
"logps/real": -333.7328186035156, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.68477630615234, |
|
"rewards/margins": 101.74092102050781, |
|
"rewards/real": -10.943849563598633, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.485455720749838e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1487.2841796875, |
|
"logps/real": -388.5537109375, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -106.80704498291016, |
|
"rewards/margins": 94.99656677246094, |
|
"rewards/real": -11.810464859008789, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4531351001939233e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1495.3056640625, |
|
"logps/real": -379.88812255859375, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.61912536621094, |
|
"rewards/margins": 103.34244537353516, |
|
"rewards/real": -10.276670455932617, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.420814479638009e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1456.389404296875, |
|
"logps/real": -373.63055419921875, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -105.97420501708984, |
|
"rewards/margins": 93.8226089477539, |
|
"rewards/real": -12.15159797668457, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.388493859082094e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1519.46484375, |
|
"logps/real": -374.6028747558594, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -115.361083984375, |
|
"rewards/margins": 103.88883209228516, |
|
"rewards/real": -11.472256660461426, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3561732385261796e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1479.944580078125, |
|
"logps/real": -372.4063720703125, |
|
"loss": 0.0542, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -111.03340911865234, |
|
"rewards/margins": 100.03633880615234, |
|
"rewards/real": -10.997079849243164, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.323852617970265e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1450.19384765625, |
|
"logps/real": -364.53216552734375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -106.992431640625, |
|
"rewards/margins": 95.22738647460938, |
|
"rewards/real": -11.765054702758789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": -Infinity, |
|
"eval_logits/real": -Infinity, |
|
"eval_logps/generated": -1134.443603515625, |
|
"eval_logps/real": -239.62489318847656, |
|
"eval_loss": 0.0013600171077996492, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -78.73544311523438, |
|
"eval_rewards/margins": 80.4151382446289, |
|
"eval_rewards/real": 1.6796952486038208, |
|
"eval_runtime": 53.7395, |
|
"eval_samples_per_second": 9.304, |
|
"eval_steps_per_second": 0.298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.29153199741435e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1505.414794921875, |
|
"logps/real": -367.946533203125, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.87660217285156, |
|
"rewards/margins": 105.79449462890625, |
|
"rewards/real": -12.082099914550781, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2592113768584355e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1453.390380859375, |
|
"logps/real": -421.10675048828125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.93519592285156, |
|
"rewards/margins": 93.87348937988281, |
|
"rewards/real": -14.061718940734863, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.226890756302521e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1499.802490234375, |
|
"logps/real": -332.4186096191406, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.83087158203125, |
|
"rewards/margins": 104.75523376464844, |
|
"rewards/real": -9.075627326965332, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1945701357466063e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1523.4775390625, |
|
"logps/real": -329.2354736328125, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -111.63041687011719, |
|
"rewards/margins": 100.53762817382812, |
|
"rewards/real": -11.09280776977539, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1622495151906917e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1604.60400390625, |
|
"logps/real": -332.28125, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.66239929199219, |
|
"rewards/margins": 111.79366302490234, |
|
"rewards/real": -11.868745803833008, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.129928894634777e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1439.7813720703125, |
|
"logps/real": -380.1018981933594, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -102.81221008300781, |
|
"rewards/margins": 91.01803588867188, |
|
"rewards/real": -11.794178009033203, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0976082740788623e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1502.5631103515625, |
|
"logps/real": -377.87078857421875, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -113.04085540771484, |
|
"rewards/margins": 99.44877624511719, |
|
"rewards/real": -13.592073440551758, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0652876535229474e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1426.6593017578125, |
|
"logps/real": -354.6351013183594, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.11717224121094, |
|
"rewards/margins": 95.74311828613281, |
|
"rewards/real": -11.374046325683594, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0329670329670329e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1396.0999755859375, |
|
"logps/real": -357.58099365234375, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.55279541015625, |
|
"rewards/margins": 92.6092300415039, |
|
"rewards/real": -10.943570137023926, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0006464124111183e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1366.3839111328125, |
|
"logps/real": -382.16925048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -97.59391784667969, |
|
"rewards/margins": 85.68504333496094, |
|
"rewards/real": -11.908872604370117, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9683257918552034e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1476.2596435546875, |
|
"logps/real": -328.00311279296875, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.4925308227539, |
|
"rewards/margins": 107.18376159667969, |
|
"rewards/real": -7.3087663650512695, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9360051712992888e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1523.17919921875, |
|
"logps/real": -350.80926513671875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.21231842041016, |
|
"rewards/margins": 107.56327819824219, |
|
"rewards/real": -9.649039268493652, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.903684550743374e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1442.433837890625, |
|
"logps/real": -374.6177978515625, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -106.27415466308594, |
|
"rewards/margins": 94.25675964355469, |
|
"rewards/real": -12.017416000366211, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8713639301874596e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1547.759765625, |
|
"logps/real": -388.2961120605469, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.14979553222656, |
|
"rewards/margins": 103.656982421875, |
|
"rewards/real": -11.492792129516602, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.839043309631545e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1544.578857421875, |
|
"logps/real": -395.2447814941406, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.8302001953125, |
|
"rewards/margins": 101.32015991210938, |
|
"rewards/real": -12.51003360748291, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8067226890756302e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1487.8250732421875, |
|
"logps/real": -403.1583557128906, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.69364929199219, |
|
"rewards/margins": 102.64971923828125, |
|
"rewards/real": -12.043939590454102, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7744020685197156e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1511.9691162109375, |
|
"logps/real": -398.0124206542969, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.04685974121094, |
|
"rewards/margins": 99.53727722167969, |
|
"rewards/real": -13.509580612182617, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7420814479638007e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1435.295166015625, |
|
"logps/real": -357.7347412109375, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -104.58961486816406, |
|
"rewards/margins": 92.06977081298828, |
|
"rewards/real": -12.519845962524414, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7097608274078861e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1525.77587890625, |
|
"logps/real": -375.4105529785156, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.15812683105469, |
|
"rewards/margins": 104.0722427368164, |
|
"rewards/real": -12.085866928100586, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6774402068519713e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1605.22509765625, |
|
"logps/real": -363.2497253417969, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.36296081542969, |
|
"rewards/margins": 109.96449279785156, |
|
"rewards/real": -12.398462295532227, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6451195862960567e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1576.1768798828125, |
|
"logps/real": -362.9947509765625, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.62059020996094, |
|
"rewards/margins": 107.40449523925781, |
|
"rewards/real": -10.216104507446289, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6127989657401424e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1494.521240234375, |
|
"logps/real": -343.0533447265625, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.72247314453125, |
|
"rewards/margins": 99.89729309082031, |
|
"rewards/real": -9.825179100036621, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5804783451842275e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1424.344482421875, |
|
"logps/real": -378.67254638671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -105.49433898925781, |
|
"rewards/margins": 94.50493621826172, |
|
"rewards/real": -10.989395141601562, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.548157724628313e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1587.80859375, |
|
"logps/real": -405.1655578613281, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.33634948730469, |
|
"rewards/margins": 111.79786682128906, |
|
"rewards/real": -11.538492202758789, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.515837104072398e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1647.1484375, |
|
"logps/real": -350.2432556152344, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.53840637207031, |
|
"rewards/margins": 115.1009750366211, |
|
"rewards/real": -12.437448501586914, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4835164835164835e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1543.418701171875, |
|
"logps/real": -345.29010009765625, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.98878479003906, |
|
"rewards/margins": 101.12002563476562, |
|
"rewards/real": -11.868749618530273, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.451195862960569e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1442.285888671875, |
|
"logps/real": -378.0422668457031, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.7689208984375, |
|
"rewards/margins": 90.13929748535156, |
|
"rewards/real": -13.629626274108887, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.418875242404654e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1505.737548828125, |
|
"logps/real": -368.3939514160156, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.25019836425781, |
|
"rewards/margins": 99.92918395996094, |
|
"rewards/real": -12.321023941040039, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3865546218487394e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1535.7156982421875, |
|
"logps/real": -399.29071044921875, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -113.56538391113281, |
|
"rewards/margins": 100.34688568115234, |
|
"rewards/real": -13.218482971191406, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3542340012928246e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1459.90234375, |
|
"logps/real": -350.92852783203125, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -107.59828186035156, |
|
"rewards/margins": 95.6481704711914, |
|
"rewards/real": -11.950109481811523, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3219133807369102e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1592.28271484375, |
|
"logps/real": -380.5674743652344, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.418701171875, |
|
"rewards/margins": 111.38984680175781, |
|
"rewards/real": -12.028863906860352, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2895927601809956e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1478.0816650390625, |
|
"logps/real": -356.773193359375, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -109.80792236328125, |
|
"rewards/margins": 98.7099609375, |
|
"rewards/real": -11.09797477722168, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2572721396250808e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1485.4163818359375, |
|
"logps/real": -361.04034423828125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.02418518066406, |
|
"rewards/margins": 100.07078552246094, |
|
"rewards/real": -11.953384399414062, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.224951519069166e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1670.5823974609375, |
|
"logps/real": -339.33990478515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -129.7960205078125, |
|
"rewards/margins": 118.44244384765625, |
|
"rewards/real": -11.353588104248047, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1926308985132513e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1597.1910400390625, |
|
"logps/real": -377.29229736328125, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -123.081298828125, |
|
"rewards/margins": 109.92012786865234, |
|
"rewards/real": -13.16118335723877, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1603102779573367e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1441.568603515625, |
|
"logps/real": -406.8894348144531, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -109.91035461425781, |
|
"rewards/margins": 95.97262573242188, |
|
"rewards/real": -13.937726020812988, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.127989657401422e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1536.1624755859375, |
|
"logps/real": -396.4678039550781, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.73885345458984, |
|
"rewards/margins": 101.73265075683594, |
|
"rewards/real": -14.006208419799805, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0956690368455074e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1565.8385009765625, |
|
"logps/real": -404.0613098144531, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.45894622802734, |
|
"rewards/margins": 104.0071029663086, |
|
"rewards/real": -13.451852798461914, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0633484162895927e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1527.2576904296875, |
|
"logps/real": -366.9310302734375, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -114.03080749511719, |
|
"rewards/margins": 101.37015533447266, |
|
"rewards/real": -12.660650253295898, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.031027795733678e-07, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1538.400634765625, |
|
"logps/real": -369.4671936035156, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -119.62967681884766, |
|
"rewards/margins": 106.96687316894531, |
|
"rewards/real": -12.662785530090332, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.987071751777634e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1612.37890625, |
|
"logps/real": -348.5940856933594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.49344635009766, |
|
"rewards/margins": 110.88600158691406, |
|
"rewards/real": -12.607457160949707, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.663865546218488e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1591.698486328125, |
|
"logps/real": -374.79974365234375, |
|
"loss": 0.026, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -121.3034439086914, |
|
"rewards/margins": 109.7343978881836, |
|
"rewards/real": -11.569036483764648, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.340659340659341e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1548.60107421875, |
|
"logps/real": -369.18023681640625, |
|
"loss": 0.027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.30462646484375, |
|
"rewards/margins": 103.0732650756836, |
|
"rewards/real": -14.231346130371094, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.017453135100193e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1586.9119873046875, |
|
"logps/real": -430.03204345703125, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -120.7913818359375, |
|
"rewards/margins": 105.49790954589844, |
|
"rewards/real": -15.29345989227295, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.694246929541046e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1536.225830078125, |
|
"logps/real": -376.352783203125, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.41569519042969, |
|
"rewards/margins": 105.79057312011719, |
|
"rewards/real": -10.625109672546387, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.371040723981899e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1552.786376953125, |
|
"logps/real": -365.3706359863281, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.2040786743164, |
|
"rewards/margins": 107.033935546875, |
|
"rewards/real": -10.17013168334961, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.047834518422754e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1659.7255859375, |
|
"logps/real": -363.5750427246094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.04495239257812, |
|
"rewards/margins": 118.81474304199219, |
|
"rewards/real": -12.230215072631836, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.724628312863607e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1631.27197265625, |
|
"logps/real": -374.7925109863281, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.56916809082031, |
|
"rewards/margins": 114.83509826660156, |
|
"rewards/real": -11.734076499938965, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.40142210730446e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1581.739990234375, |
|
"logps/real": -361.7199401855469, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -120.41544342041016, |
|
"rewards/margins": 111.34599304199219, |
|
"rewards/real": -9.069457054138184, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.078215901745313e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1583.1722412109375, |
|
"logps/real": -396.0815124511719, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.67060852050781, |
|
"rewards/margins": 108.18394470214844, |
|
"rewards/real": -13.486651420593262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_logits/generated": -Infinity, |
|
"eval_logits/real": -Infinity, |
|
"eval_logps/generated": -1271.4552001953125, |
|
"eval_logps/real": -248.8362274169922, |
|
"eval_loss": 0.0004725866310764104, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -92.43658447265625, |
|
"eval_rewards/margins": 93.19515228271484, |
|
"eval_rewards/real": 0.758558988571167, |
|
"eval_runtime": 53.1656, |
|
"eval_samples_per_second": 9.405, |
|
"eval_steps_per_second": 0.301, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.755009696186167e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1532.7603759765625, |
|
"logps/real": -394.45245361328125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.71403503417969, |
|
"rewards/margins": 102.07691192626953, |
|
"rewards/real": -13.637112617492676, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.43180349062702e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1559.9749755859375, |
|
"logps/real": -389.63818359375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.5301284790039, |
|
"rewards/margins": 106.88127136230469, |
|
"rewards/real": -14.648859024047852, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.108597285067872e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1478.6671142578125, |
|
"logps/real": -392.6995544433594, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.4936752319336, |
|
"rewards/margins": 95.81269836425781, |
|
"rewards/real": -13.68098258972168, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.785391079508726e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1669.7158203125, |
|
"logps/real": -367.13861083984375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.03195190429688, |
|
"rewards/margins": 119.17869567871094, |
|
"rewards/real": -11.853246688842773, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.46218487394958e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1580.840576171875, |
|
"logps/real": -375.2113037109375, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.67564392089844, |
|
"rewards/margins": 104.2962417602539, |
|
"rewards/real": -13.379419326782227, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.1389786683904325e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1616.459716796875, |
|
"logps/real": -423.4244079589844, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -123.4718017578125, |
|
"rewards/margins": 107.97757720947266, |
|
"rewards/real": -15.494239807128906, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8157724628312865e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1683.770751953125, |
|
"logps/real": -353.0, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.78575134277344, |
|
"rewards/margins": 119.87959289550781, |
|
"rewards/real": -11.906171798706055, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.492566257272139e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1539.91357421875, |
|
"logps/real": -374.6343994140625, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -118.34747314453125, |
|
"rewards/margins": 106.7379150390625, |
|
"rewards/real": -11.609537124633789, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.169360051712993e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1500.190185546875, |
|
"logps/real": -385.86871337890625, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -111.73441314697266, |
|
"rewards/margins": 98.70021057128906, |
|
"rewards/real": -13.034196853637695, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1669.4632568359375, |
|
"logps/real": -388.4225158691406, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -128.76541137695312, |
|
"rewards/margins": 115.59422302246094, |
|
"rewards/real": -13.171170234680176, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5229476405946995e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1649.5755615234375, |
|
"logps/real": -381.32904052734375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -128.5321807861328, |
|
"rewards/margins": 115.7733154296875, |
|
"rewards/real": -12.758865356445312, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.199741435035552e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1625.4625244140625, |
|
"logps/real": -376.5026550292969, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.13980865478516, |
|
"rewards/margins": 110.08150482177734, |
|
"rewards/real": -14.058306694030762, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.8765352294764057e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1690.6812744140625, |
|
"logps/real": -397.8863220214844, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.1649932861328, |
|
"rewards/margins": 116.99076843261719, |
|
"rewards/real": -14.174229621887207, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.553329023917259e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1705.136962890625, |
|
"logps/real": -384.80731201171875, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -132.203369140625, |
|
"rewards/margins": 119.72615814208984, |
|
"rewards/real": -12.477212905883789, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2301228183581126e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1615.5211181640625, |
|
"logps/real": -386.2267150878906, |
|
"loss": 0.1029, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -124.50753021240234, |
|
"rewards/margins": 111.1576156616211, |
|
"rewards/real": -13.349906921386719, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.906916612798966e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1568.3372802734375, |
|
"logps/real": -392.12310791015625, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.6063003540039, |
|
"rewards/margins": 107.2759780883789, |
|
"rewards/real": -12.330320358276367, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5837104072398187e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1709.5218505859375, |
|
"logps/real": -373.9810791015625, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -135.69493103027344, |
|
"rewards/margins": 122.1058120727539, |
|
"rewards/real": -13.58912467956543, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2605042016806723e-08, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1579.2674560546875, |
|
"logps/real": -364.59881591796875, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.725341796875, |
|
"rewards/margins": 111.22846984863281, |
|
"rewards/real": -10.496882438659668, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.372979961215254e-09, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1586.878662109375, |
|
"logps/real": -385.99664306640625, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.03648376464844, |
|
"rewards/margins": 108.7036361694336, |
|
"rewards/real": -12.332856178283691, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.140917905623787e-09, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1628.6751708984375, |
|
"logps/real": -391.7861633300781, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -121.451171875, |
|
"rewards/margins": 108.03205871582031, |
|
"rewards/real": -13.419122695922852, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9088558500323206e-09, |
|
"logits/generated": -Infinity, |
|
"logits/real": -Infinity, |
|
"logps/generated": -1687.310546875, |
|
"logps/real": -375.3674011230469, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -130.00363159179688, |
|
"rewards/margins": 117.36309814453125, |
|
"rewards/real": -12.640539169311523, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1719, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2576274702628765, |
|
"train_runtime": 12051.7731, |
|
"train_samples_per_second": 4.564, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1719, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|