|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1274, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007849293563579278, |
|
"grad_norm": 6.091750131963952, |
|
"learning_rate": 3.90625e-09, |
|
"logits/chosen": 5914.52099609375, |
|
"logits/rejected": 2785.021484375, |
|
"logps/chosen": -212.45889282226562, |
|
"logps/rejected": -98.59669494628906, |
|
"loss": 2.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007849293563579277, |
|
"grad_norm": 6.059369066138964, |
|
"learning_rate": 3.9062499999999997e-08, |
|
"logits/chosen": 4973.767578125, |
|
"logits/rejected": 4328.24365234375, |
|
"logps/chosen": -204.23040771484375, |
|
"logps/rejected": -179.6959686279297, |
|
"loss": 2.0, |
|
"rewards/accuracies": 0.4351852238178253, |
|
"rewards/chosen": 0.05348973721265793, |
|
"rewards/margins": 0.04383070021867752, |
|
"rewards/rejected": 0.009659038856625557, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 6.2041398306498685, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": 6084.0126953125, |
|
"logits/rejected": 4834.15869140625, |
|
"logps/chosen": -217.233642578125, |
|
"logps/rejected": -196.76730346679688, |
|
"loss": 2.0, |
|
"rewards/accuracies": 0.4833333492279053, |
|
"rewards/chosen": -0.02243190072476864, |
|
"rewards/margins": 0.03131182864308357, |
|
"rewards/rejected": -0.053743720054626465, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023547880690737835, |
|
"grad_norm": 5.475152737188952, |
|
"learning_rate": 1.1718749999999999e-07, |
|
"logits/chosen": 6084.05908203125, |
|
"logits/rejected": 5104.91748046875, |
|
"logps/chosen": -250.56375122070312, |
|
"logps/rejected": -209.34457397460938, |
|
"loss": 1.9999, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": 0.008603035472333431, |
|
"rewards/margins": 0.05558818578720093, |
|
"rewards/rejected": -0.04698514938354492, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 5.690781670053855, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": 5311.8642578125, |
|
"logits/rejected": 4346.90673828125, |
|
"logps/chosen": -211.9870147705078, |
|
"logps/rejected": -181.72384643554688, |
|
"loss": 1.9997, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": 0.09225504100322723, |
|
"rewards/margins": 0.12906396389007568, |
|
"rewards/rejected": -0.036808937788009644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03924646781789639, |
|
"grad_norm": 5.758835415832506, |
|
"learning_rate": 1.9531249999999998e-07, |
|
"logits/chosen": 6425.0869140625, |
|
"logits/rejected": 5042.13916015625, |
|
"logps/chosen": -265.3288879394531, |
|
"logps/rejected": -206.8175048828125, |
|
"loss": 1.9992, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": 0.29770660400390625, |
|
"rewards/margins": 0.4155918061733246, |
|
"rewards/rejected": -0.11788525432348251, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 5.552781734195383, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": 5484.19091796875, |
|
"logits/rejected": 4559.71923828125, |
|
"logps/chosen": -213.75491333007812, |
|
"logps/rejected": -209.1690216064453, |
|
"loss": 1.9986, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.24906444549560547, |
|
"rewards/margins": 0.6179634928703308, |
|
"rewards/rejected": -0.36889898777008057, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"grad_norm": 5.377299650407647, |
|
"learning_rate": 2.734375e-07, |
|
"logits/chosen": 5194.39501953125, |
|
"logits/rejected": 4918.55322265625, |
|
"logps/chosen": -178.2979736328125, |
|
"logps/rejected": -177.39535522460938, |
|
"loss": 1.998, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": 0.18295133113861084, |
|
"rewards/margins": 0.8465949892997742, |
|
"rewards/rejected": -0.6636435985565186, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 5.650017423938731, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 5774.41748046875, |
|
"logits/rejected": 5269.755859375, |
|
"logps/chosen": -196.76974487304688, |
|
"logps/rejected": -182.98446655273438, |
|
"loss": 1.9959, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -0.20117223262786865, |
|
"rewards/margins": 1.1927589178085327, |
|
"rewards/rejected": -1.3939311504364014, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0706436420722135, |
|
"grad_norm": 6.605265759829803, |
|
"learning_rate": 3.5156249999999997e-07, |
|
"logits/chosen": 6039.5634765625, |
|
"logits/rejected": 5181.203125, |
|
"logps/chosen": -220.1410675048828, |
|
"logps/rejected": -190.4438934326172, |
|
"loss": 1.9924, |
|
"rewards/accuracies": 0.7583332657814026, |
|
"rewards/chosen": -1.2588454484939575, |
|
"rewards/margins": 4.741243362426758, |
|
"rewards/rejected": -6.000088691711426, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 7.213834973026546, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": 5967.2626953125, |
|
"logits/rejected": 5745.4873046875, |
|
"logps/chosen": -213.9269256591797, |
|
"logps/rejected": -208.77261352539062, |
|
"loss": 1.9932, |
|
"rewards/accuracies": 0.6750000715255737, |
|
"rewards/chosen": -3.696242094039917, |
|
"rewards/margins": 5.634647369384766, |
|
"rewards/rejected": -9.330889701843262, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08634222919937205, |
|
"grad_norm": 6.156201133938342, |
|
"learning_rate": 4.2968749999999996e-07, |
|
"logits/chosen": 6470.56884765625, |
|
"logits/rejected": 5289.95849609375, |
|
"logps/chosen": -188.42457580566406, |
|
"logps/rejected": -190.61293029785156, |
|
"loss": 1.9887, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -5.309735298156738, |
|
"rewards/margins": 6.310976982116699, |
|
"rewards/rejected": -11.620712280273438, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 8.999184508333663, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": 6397.328125, |
|
"logits/rejected": 5324.27587890625, |
|
"logps/chosen": -210.2899169921875, |
|
"logps/rejected": -212.7317657470703, |
|
"loss": 1.9865, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -7.453166961669922, |
|
"rewards/margins": 7.11349630355835, |
|
"rewards/rejected": -14.56666374206543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 6.546135911338803, |
|
"learning_rate": 4.999962424962166e-07, |
|
"logits/chosen": 6333.814453125, |
|
"logits/rejected": 5863.9287109375, |
|
"logps/chosen": -215.83139038085938, |
|
"logps/rejected": -212.8985137939453, |
|
"loss": 1.9843, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -6.0229082107543945, |
|
"rewards/margins": 9.231057167053223, |
|
"rewards/rejected": -15.2539644241333, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 7.874723764445264, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": 6198.4814453125, |
|
"logits/rejected": 5459.091796875, |
|
"logps/chosen": -195.5287628173828, |
|
"logps/rejected": -197.17437744140625, |
|
"loss": 1.983, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": -9.861322402954102, |
|
"rewards/margins": 9.721135139465332, |
|
"rewards/rejected": -19.582454681396484, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11773940345368916, |
|
"grad_norm": 9.20471530543848, |
|
"learning_rate": 4.995454786965036e-07, |
|
"logits/chosen": 6376.53857421875, |
|
"logits/rejected": 5328.8974609375, |
|
"logps/chosen": -210.15576171875, |
|
"logps/rejected": -192.92373657226562, |
|
"loss": 1.98, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -8.471376419067383, |
|
"rewards/margins": 12.154645919799805, |
|
"rewards/rejected": -20.626020431518555, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 7.461193923622369, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": 6318.7490234375, |
|
"logits/rejected": 5644.8623046875, |
|
"logps/chosen": -207.67337036132812, |
|
"logps/rejected": -219.74203491210938, |
|
"loss": 1.9794, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -10.14518928527832, |
|
"rewards/margins": 12.539278984069824, |
|
"rewards/rejected": -22.684467315673828, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13343799058084774, |
|
"grad_norm": 6.508017494074614, |
|
"learning_rate": 4.983447664444096e-07, |
|
"logits/chosen": 6519.24169921875, |
|
"logits/rejected": 5813.3642578125, |
|
"logps/chosen": -220.01766967773438, |
|
"logps/rejected": -216.58914184570312, |
|
"loss": 1.9806, |
|
"rewards/accuracies": 0.6583333611488342, |
|
"rewards/chosen": -10.396989822387695, |
|
"rewards/margins": 9.870583534240723, |
|
"rewards/rejected": -20.267574310302734, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 7.695429851231189, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": 5723.25439453125, |
|
"logits/rejected": 5001.2041015625, |
|
"logps/chosen": -198.26637268066406, |
|
"logps/rejected": -199.6933135986328, |
|
"loss": 1.977, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -9.738619804382324, |
|
"rewards/margins": 11.793425559997559, |
|
"rewards/rejected": -21.532047271728516, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14913657770800628, |
|
"grad_norm": 8.229494993248766, |
|
"learning_rate": 4.963977141895843e-07, |
|
"logits/chosen": 5866.8330078125, |
|
"logits/rejected": 5040.8291015625, |
|
"logps/chosen": -214.79452514648438, |
|
"logps/rejected": -226.88827514648438, |
|
"loss": 1.9734, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -11.035551071166992, |
|
"rewards/margins": 24.613243103027344, |
|
"rewards/rejected": -35.6487922668457, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 8.986914548041765, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": 5760.6181640625, |
|
"logits/rejected": 5327.140625, |
|
"logps/chosen": -192.12252807617188, |
|
"logps/rejected": -224.0847930908203, |
|
"loss": 1.9722, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -9.578254699707031, |
|
"rewards/margins": 17.835506439208984, |
|
"rewards/rejected": -27.413761138916016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16483516483516483, |
|
"grad_norm": 11.60946438594492, |
|
"learning_rate": 4.937101733256606e-07, |
|
"logits/chosen": 5221.634765625, |
|
"logits/rejected": 4657.677734375, |
|
"logps/chosen": -167.0458984375, |
|
"logps/rejected": -188.22262573242188, |
|
"loss": 1.9736, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": -13.132657051086426, |
|
"rewards/margins": 17.90899085998535, |
|
"rewards/rejected": -31.041645050048828, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 13.95805192206325, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": 6353.7783203125, |
|
"logits/rejected": 5411.5087890625, |
|
"logps/chosen": -207.0021514892578, |
|
"logps/rejected": -192.39125061035156, |
|
"loss": 1.97, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -10.665520668029785, |
|
"rewards/margins": 17.728992462158203, |
|
"rewards/rejected": -28.39451026916504, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18053375196232338, |
|
"grad_norm": 7.164170611764204, |
|
"learning_rate": 4.902902206053098e-07, |
|
"logits/chosen": 5817.7529296875, |
|
"logits/rejected": 5253.7822265625, |
|
"logps/chosen": -201.00804138183594, |
|
"logps/rejected": -212.4803924560547, |
|
"loss": 1.9743, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -13.669303894042969, |
|
"rewards/margins": 19.043621063232422, |
|
"rewards/rejected": -32.71292495727539, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 11.660382561338869, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": 5308.75634765625, |
|
"logits/rejected": 4242.29345703125, |
|
"logps/chosen": -174.8252716064453, |
|
"logps/rejected": -180.3127899169922, |
|
"loss": 1.9672, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -7.196226596832275, |
|
"rewards/margins": 18.427143096923828, |
|
"rewards/rejected": -25.623367309570312, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19623233908948196, |
|
"grad_norm": 8.72617485657044, |
|
"learning_rate": 4.861481338675183e-07, |
|
"logits/chosen": 6269.2236328125, |
|
"logits/rejected": 5571.205078125, |
|
"logps/chosen": -180.53604125976562, |
|
"logps/rejected": -221.07125854492188, |
|
"loss": 1.9743, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": -13.392184257507324, |
|
"rewards/margins": 23.172420501708984, |
|
"rewards/rejected": -36.564598083496094, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 10.038046373183738, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": 6411.8017578125, |
|
"logits/rejected": 5188.3544921875, |
|
"logps/chosen": -212.73208618164062, |
|
"logps/rejected": -196.77479553222656, |
|
"loss": 1.9814, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -13.01362419128418, |
|
"rewards/margins": 17.537227630615234, |
|
"rewards/rejected": -30.550851821899414, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2119309262166405, |
|
"grad_norm": 7.978893418055898, |
|
"learning_rate": 4.812963611500339e-07, |
|
"logits/chosen": 6247.86376953125, |
|
"logits/rejected": 6049.94140625, |
|
"logps/chosen": -206.0583038330078, |
|
"logps/rejected": -213.51162719726562, |
|
"loss": 1.962, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -9.302484512329102, |
|
"rewards/margins": 15.580082893371582, |
|
"rewards/rejected": -24.882568359375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 18.46477187944741, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": 5388.15673828125, |
|
"logits/rejected": 4686.12939453125, |
|
"logps/chosen": -176.19607543945312, |
|
"logps/rejected": -213.16305541992188, |
|
"loss": 1.9659, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -13.045190811157227, |
|
"rewards/margins": 30.816421508789062, |
|
"rewards/rejected": -43.861610412597656, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22762951334379905, |
|
"grad_norm": 16.83181121684455, |
|
"learning_rate": 4.7574948327980567e-07, |
|
"logits/chosen": 7499.02490234375, |
|
"logits/rejected": 5559.24609375, |
|
"logps/chosen": -246.99514770507812, |
|
"logps/rejected": -226.33572387695312, |
|
"loss": 1.9613, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -9.966670036315918, |
|
"rewards/margins": 33.853336334228516, |
|
"rewards/rejected": -43.81999969482422, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 10.713215982695514, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": 5454.79150390625, |
|
"logits/rejected": 5429.51171875, |
|
"logps/chosen": -181.81573486328125, |
|
"logps/rejected": -206.89437866210938, |
|
"loss": 1.975, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": -14.343295097351074, |
|
"rewards/margins": 22.364656448364258, |
|
"rewards/rejected": -36.707950592041016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24332810047095763, |
|
"grad_norm": 10.320663018292882, |
|
"learning_rate": 4.6952417005384247e-07, |
|
"logits/chosen": 6112.76806640625, |
|
"logits/rejected": 5453.38134765625, |
|
"logps/chosen": -185.7081298828125, |
|
"logps/rejected": -200.83535766601562, |
|
"loss": 1.9697, |
|
"rewards/accuracies": 0.7666667699813843, |
|
"rewards/chosen": -8.127434730529785, |
|
"rewards/margins": 17.2396297454834, |
|
"rewards/rejected": -25.367061614990234, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 10.476570210269244, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": 6115.5517578125, |
|
"logits/rejected": 5302.81689453125, |
|
"logps/chosen": -209.9392852783203, |
|
"logps/rejected": -239.3898468017578, |
|
"loss": 1.964, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -8.882265090942383, |
|
"rewards/margins": 28.489694595336914, |
|
"rewards/rejected": -37.37196350097656, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25902668759811615, |
|
"grad_norm": 8.453410137621447, |
|
"learning_rate": 4.626391301421782e-07, |
|
"logits/chosen": 5915.05224609375, |
|
"logits/rejected": 5387.32177734375, |
|
"logps/chosen": -203.82455444335938, |
|
"logps/rejected": -205.4024200439453, |
|
"loss": 1.9754, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -9.371356964111328, |
|
"rewards/margins": 18.708255767822266, |
|
"rewards/rejected": -28.079614639282227, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 7.101482635499888, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": 6163.81689453125, |
|
"logits/rejected": 6304.29736328125, |
|
"logps/chosen": -200.20118713378906, |
|
"logps/rejected": -244.84848022460938, |
|
"loss": 1.9674, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": -6.342929363250732, |
|
"rewards/margins": 18.13802719116211, |
|
"rewards/rejected": -24.480958938598633, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"grad_norm": 22.517967387414878, |
|
"learning_rate": 4.5511505486349865e-07, |
|
"logits/chosen": 6512.6396484375, |
|
"logits/rejected": 5911.80859375, |
|
"logps/chosen": -203.82406616210938, |
|
"logps/rejected": -247.0866241455078, |
|
"loss": 1.9653, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -8.998370170593262, |
|
"rewards/margins": 31.266225814819336, |
|
"rewards/rejected": -40.26459503173828, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 10.024372865307708, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": 5994.025390625, |
|
"logits/rejected": 5905.3828125, |
|
"logps/chosen": -195.99481201171875, |
|
"logps/rejected": -248.48153686523438, |
|
"loss": 1.9671, |
|
"rewards/accuracies": 0.7249999642372131, |
|
"rewards/chosen": -18.07135772705078, |
|
"rewards/margins": 31.128345489501953, |
|
"rewards/rejected": -49.199703216552734, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2904238618524333, |
|
"grad_norm": 9.19113626070318, |
|
"learning_rate": 4.4697455600239863e-07, |
|
"logits/chosen": 5450.50537109375, |
|
"logits/rejected": 5144.671875, |
|
"logps/chosen": -196.91354370117188, |
|
"logps/rejected": -200.369873046875, |
|
"loss": 1.9767, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -12.49148941040039, |
|
"rewards/margins": 19.807308197021484, |
|
"rewards/rejected": -32.29880142211914, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 16.222088593597523, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": 6513.00927734375, |
|
"logits/rejected": 5406.20361328125, |
|
"logps/chosen": -236.3499298095703, |
|
"logps/rejected": -216.02804565429688, |
|
"loss": 1.9675, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -8.00889778137207, |
|
"rewards/margins": 25.009592056274414, |
|
"rewards/rejected": -33.01848602294922, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 16.753657487210614, |
|
"learning_rate": 4.3824209785514326e-07, |
|
"logits/chosen": 6674.41259765625, |
|
"logits/rejected": 5131.07373046875, |
|
"logps/chosen": -222.05276489257812, |
|
"logps/rejected": -223.5919647216797, |
|
"loss": 1.9553, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -6.559300899505615, |
|
"rewards/margins": 37.17955780029297, |
|
"rewards/rejected": -43.73885726928711, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 9.085445983363275, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": 5777.0849609375, |
|
"logits/rejected": 5671.5537109375, |
|
"logps/chosen": -201.79702758789062, |
|
"logps/rejected": -225.7125701904297, |
|
"loss": 1.9761, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -11.008450508117676, |
|
"rewards/margins": 25.964609146118164, |
|
"rewards/rejected": -36.973060607910156, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3218210361067504, |
|
"grad_norm": 11.33832444227533, |
|
"learning_rate": 4.2894392370815567e-07, |
|
"logits/chosen": 6256.8134765625, |
|
"logits/rejected": 5594.1708984375, |
|
"logps/chosen": -223.0230712890625, |
|
"logps/rejected": -257.01385498046875, |
|
"loss": 1.9476, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -10.536191940307617, |
|
"rewards/margins": 38.025299072265625, |
|
"rewards/rejected": -48.561485290527344, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 16.273251635067584, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": 5923.02880859375, |
|
"logits/rejected": 5376.9443359375, |
|
"logps/chosen": -220.4603729248047, |
|
"logps/rejected": -229.0534210205078, |
|
"loss": 1.969, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -16.430438995361328, |
|
"rewards/margins": 31.381549835205078, |
|
"rewards/rejected": -47.81198501586914, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33751962323390894, |
|
"grad_norm": 11.151359651562395, |
|
"learning_rate": 4.1910797697018017e-07, |
|
"logits/chosen": 5696.1357421875, |
|
"logits/rejected": 4776.5791015625, |
|
"logps/chosen": -192.7012939453125, |
|
"logps/rejected": -203.29515075683594, |
|
"loss": 1.9619, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -12.956321716308594, |
|
"rewards/margins": 29.105701446533203, |
|
"rewards/rejected": -42.06201934814453, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 15.676627010168398, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": 6032.6103515625, |
|
"logits/rejected": 5975.65869140625, |
|
"logps/chosen": -194.45022583007812, |
|
"logps/rejected": -232.76809692382812, |
|
"loss": 1.9601, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.056891441345215, |
|
"rewards/margins": 29.027606964111328, |
|
"rewards/rejected": -41.08449935913086, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3532182103610675, |
|
"grad_norm": 11.418135496524267, |
|
"learning_rate": 4.087638171951401e-07, |
|
"logits/chosen": 6920.4697265625, |
|
"logits/rejected": 5017.2001953125, |
|
"logps/chosen": -218.02651977539062, |
|
"logps/rejected": -215.5140838623047, |
|
"loss": 1.9654, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -9.257742881774902, |
|
"rewards/margins": 43.26369857788086, |
|
"rewards/rejected": -52.52144241333008, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 9.738474609851922, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": 7102.1494140625, |
|
"logits/rejected": 5567.33251953125, |
|
"logps/chosen": -262.5325622558594, |
|
"logps/rejected": -226.304931640625, |
|
"loss": 1.9647, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": -12.100336074829102, |
|
"rewards/margins": 23.9965763092041, |
|
"rewards/rejected": -36.09691619873047, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36891679748822603, |
|
"grad_norm": 16.944485993230334, |
|
"learning_rate": 3.979425312480629e-07, |
|
"logits/chosen": 6121.8251953125, |
|
"logits/rejected": 5385.27978515625, |
|
"logps/chosen": -226.22708129882812, |
|
"logps/rejected": -247.5476531982422, |
|
"loss": 1.961, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -13.962315559387207, |
|
"rewards/margins": 31.019052505493164, |
|
"rewards/rejected": -44.98136520385742, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 10.061154653659795, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": 6582.51708984375, |
|
"logits/rejected": 5718.14208984375, |
|
"logps/chosen": -220.9097900390625, |
|
"logps/rejected": -222.513916015625, |
|
"loss": 1.9678, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -13.514605522155762, |
|
"rewards/margins": 34.60355758666992, |
|
"rewards/rejected": -48.118160247802734, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 9.952729544567205, |
|
"learning_rate": 3.866766398810424e-07, |
|
"logits/chosen": 6168.904296875, |
|
"logits/rejected": 5932.04833984375, |
|
"logps/chosen": -181.2738494873047, |
|
"logps/rejected": -238.46578979492188, |
|
"loss": 1.9539, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -4.665351390838623, |
|
"rewards/margins": 31.776134490966797, |
|
"rewards/rejected": -36.44148635864258, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 10.886191142069716, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": 6751.1181640625, |
|
"logits/rejected": 5237.1748046875, |
|
"logps/chosen": -239.8447723388672, |
|
"logps/rejected": -258.9305725097656, |
|
"loss": 1.9645, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -17.89109230041504, |
|
"rewards/margins": 38.52326202392578, |
|
"rewards/rejected": -56.41435623168945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4003139717425432, |
|
"grad_norm": 12.12571996214556, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 5390.6787109375, |
|
"logits/rejected": 4790.146484375, |
|
"logps/chosen": -199.76528930664062, |
|
"logps/rejected": -217.97457885742188, |
|
"loss": 1.9639, |
|
"rewards/accuracies": 0.8166666030883789, |
|
"rewards/chosen": -10.282281875610352, |
|
"rewards/margins": 41.32698440551758, |
|
"rewards/rejected": -51.60927200317383, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 9.519450563259452, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": 5713.3515625, |
|
"logits/rejected": 5326.962890625, |
|
"logps/chosen": -212.322998046875, |
|
"logps/rejected": -235.4215850830078, |
|
"loss": 1.9753, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": -13.85698127746582, |
|
"rewards/margins": 25.776592254638672, |
|
"rewards/rejected": -39.63357925415039, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41601255886970173, |
|
"grad_norm": 9.888104468321709, |
|
"learning_rate": 3.6294770291596076e-07, |
|
"logits/chosen": 6448.71337890625, |
|
"logits/rejected": 5332.93701171875, |
|
"logps/chosen": -219.8802947998047, |
|
"logps/rejected": -227.52792358398438, |
|
"loss": 1.9559, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -6.957555294036865, |
|
"rewards/margins": 21.810047149658203, |
|
"rewards/rejected": -28.767602920532227, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 32.92673172258904, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": 6134.5771484375, |
|
"logits/rejected": 5512.7548828125, |
|
"logps/chosen": -224.2685089111328, |
|
"logps/rejected": -235.88308715820312, |
|
"loss": 1.9648, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -13.450671195983887, |
|
"rewards/margins": 34.19359588623047, |
|
"rewards/rejected": -47.64427185058594, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4317111459968603, |
|
"grad_norm": 10.505039462285955, |
|
"learning_rate": 3.505559688866229e-07, |
|
"logits/chosen": 5928.0751953125, |
|
"logits/rejected": 5549.6640625, |
|
"logps/chosen": -229.20504760742188, |
|
"logps/rejected": -271.86517333984375, |
|
"loss": 1.9554, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -14.464628219604492, |
|
"rewards/margins": 32.33644485473633, |
|
"rewards/rejected": -46.80107498168945, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 10.28700128076687, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": 5985.54833984375, |
|
"logits/rejected": 5623.6748046875, |
|
"logps/chosen": -218.4139862060547, |
|
"logps/rejected": -238.92288208007812, |
|
"loss": 1.9712, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -15.138700485229492, |
|
"rewards/margins": 30.574970245361328, |
|
"rewards/rejected": -45.71367263793945, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4474097331240188, |
|
"grad_norm": 15.423527438178777, |
|
"learning_rate": 3.378620382651523e-07, |
|
"logits/chosen": 6308.0498046875, |
|
"logits/rejected": 5838.08056640625, |
|
"logps/chosen": -258.34332275390625, |
|
"logps/rejected": -273.0830383300781, |
|
"loss": 1.9588, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -15.38349437713623, |
|
"rewards/margins": 33.929012298583984, |
|
"rewards/rejected": -49.3125114440918, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 14.611083089156338, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": 6261.64453125, |
|
"logits/rejected": 5689.3505859375, |
|
"logps/chosen": -222.60165405273438, |
|
"logps/rejected": -264.10699462890625, |
|
"loss": 1.9673, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -10.186196327209473, |
|
"rewards/margins": 40.609474182128906, |
|
"rewards/rejected": -50.79567337036133, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4631083202511774, |
|
"grad_norm": 14.179147587086423, |
|
"learning_rate": 3.249040595833274e-07, |
|
"logits/chosen": 6810.31884765625, |
|
"logits/rejected": 5779.87451171875, |
|
"logps/chosen": -244.0528564453125, |
|
"logps/rejected": -228.9059600830078, |
|
"loss": 1.9497, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -12.326987266540527, |
|
"rewards/margins": 37.720115661621094, |
|
"rewards/rejected": -50.0471076965332, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 19.155016516986336, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": 6452.5693359375, |
|
"logits/rejected": 5529.4580078125, |
|
"logps/chosen": -190.16168212890625, |
|
"logps/rejected": -239.14492797851562, |
|
"loss": 1.9536, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -12.613371849060059, |
|
"rewards/margins": 36.84527587890625, |
|
"rewards/rejected": -49.458648681640625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.478806907378336, |
|
"grad_norm": 13.224856834760333, |
|
"learning_rate": 3.11720974905373e-07, |
|
"logits/chosen": 6204.62841796875, |
|
"logits/rejected": 5447.55419921875, |
|
"logps/chosen": -216.75570678710938, |
|
"logps/rejected": -233.86032104492188, |
|
"loss": 1.9449, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -10.637582778930664, |
|
"rewards/margins": 37.80018997192383, |
|
"rewards/rejected": -48.437767028808594, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 17.52069876117691, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": 5294.59375, |
|
"logits/rejected": 4387.63818359375, |
|
"logps/chosen": -196.4014129638672, |
|
"logps/rejected": -210.00869750976562, |
|
"loss": 1.9549, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -12.642759323120117, |
|
"rewards/margins": 36.267051696777344, |
|
"rewards/rejected": -48.909812927246094, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4945054945054945, |
|
"grad_norm": 33.05264787102616, |
|
"learning_rate": 2.9835240279702513e-07, |
|
"logits/chosen": 6885.4189453125, |
|
"logits/rejected": 5920.83056640625, |
|
"logps/chosen": -251.9607696533203, |
|
"logps/rejected": -256.430908203125, |
|
"loss": 1.9577, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -9.144782066345215, |
|
"rewards/margins": 53.038055419921875, |
|
"rewards/rejected": -62.182838439941406, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 13.271843782155607, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": 6548.42822265625, |
|
"logits/rejected": 5635.82861328125, |
|
"logps/chosen": -226.5624542236328, |
|
"logps/rejected": -251.34622192382812, |
|
"loss": 1.9503, |
|
"rewards/accuracies": 0.6916666626930237, |
|
"rewards/chosen": -15.800863265991211, |
|
"rewards/margins": 21.036413192749023, |
|
"rewards/rejected": -36.837284088134766, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 15.831220822034595, |
|
"learning_rate": 2.848385192615339e-07, |
|
"logits/chosen": 5647.466796875, |
|
"logits/rejected": 4650.84326171875, |
|
"logps/chosen": -208.1371612548828, |
|
"logps/rejected": -215.1590118408203, |
|
"loss": 1.9581, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -11.797808647155762, |
|
"rewards/margins": 34.633445739746094, |
|
"rewards/rejected": -46.431251525878906, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 11.97405793704744, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": 6318.73193359375, |
|
"logits/rejected": 5105.6806640625, |
|
"logps/chosen": -228.73629760742188, |
|
"logps/rejected": -247.58203125, |
|
"loss": 1.9628, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -11.349790573120117, |
|
"rewards/margins": 48.284786224365234, |
|
"rewards/rejected": -59.63458251953125, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5259026687598116, |
|
"grad_norm": 15.694525160199404, |
|
"learning_rate": 2.71219937000424e-07, |
|
"logits/chosen": 6305.85546875, |
|
"logits/rejected": 5219.8154296875, |
|
"logps/chosen": -218.81637573242188, |
|
"logps/rejected": -233.28933715820312, |
|
"loss": 1.9636, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -12.056918144226074, |
|
"rewards/margins": 32.37800216674805, |
|
"rewards/rejected": -44.4349250793457, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 12.2450633413355, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": 6118.7060546875, |
|
"logits/rejected": 5147.07470703125, |
|
"logps/chosen": -223.75503540039062, |
|
"logps/rejected": -228.0189971923828, |
|
"loss": 1.9541, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -10.093757629394531, |
|
"rewards/margins": 38.698543548583984, |
|
"rewards/rejected": -48.79230499267578, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5416012558869702, |
|
"grad_norm": 13.662534533205077, |
|
"learning_rate": 2.5753758336186326e-07, |
|
"logits/chosen": 6057.73046875, |
|
"logits/rejected": 5582.77783203125, |
|
"logps/chosen": -225.1211700439453, |
|
"logps/rejected": -271.24847412109375, |
|
"loss": 1.9559, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -16.014719009399414, |
|
"rewards/margins": 39.03708267211914, |
|
"rewards/rejected": -55.051795959472656, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 20.150978172477505, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": 5760.0908203125, |
|
"logits/rejected": 5564.3642578125, |
|
"logps/chosen": -206.0763702392578, |
|
"logps/rejected": -243.00936889648438, |
|
"loss": 1.9826, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -16.816057205200195, |
|
"rewards/margins": 39.219337463378906, |
|
"rewards/rejected": -56.03539276123047, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5572998430141287, |
|
"grad_norm": 13.452123278177462, |
|
"learning_rate": 2.4383257734343794e-07, |
|
"logits/chosen": 5708.1435546875, |
|
"logits/rejected": 5760.40869140625, |
|
"logps/chosen": -205.77761840820312, |
|
"logps/rejected": -244.1310577392578, |
|
"loss": 1.956, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -11.866220474243164, |
|
"rewards/margins": 29.644180297851562, |
|
"rewards/rejected": -41.510398864746094, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 9.709598082836985, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": 6114.9091796875, |
|
"logits/rejected": 5311.08251953125, |
|
"logps/chosen": -227.6648406982422, |
|
"logps/rejected": -236.38330078125, |
|
"loss": 1.9662, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -8.66163158416748, |
|
"rewards/margins": 27.956518173217773, |
|
"rewards/rejected": -36.61814880371094, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5729984301412873, |
|
"grad_norm": 12.868770235170873, |
|
"learning_rate": 2.3014610601897157e-07, |
|
"logits/chosen": 6644.2060546875, |
|
"logits/rejected": 5136.859375, |
|
"logps/chosen": -238.3865966796875, |
|
"logps/rejected": -222.13784790039062, |
|
"loss": 1.9514, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.240676879882812, |
|
"rewards/margins": 33.82345199584961, |
|
"rewards/rejected": -47.06413650512695, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 15.41698862667205, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": 5527.283203125, |
|
"logits/rejected": 4719.14013671875, |
|
"logps/chosen": -203.3306427001953, |
|
"logps/rejected": -216.73959350585938, |
|
"loss": 1.9527, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -10.906192779541016, |
|
"rewards/margins": 43.1873664855957, |
|
"rewards/rejected": -54.09355545043945, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5886970172684458, |
|
"grad_norm": 13.318035652689497, |
|
"learning_rate": 2.1651930076075723e-07, |
|
"logits/chosen": 6021.62890625, |
|
"logits/rejected": 5484.7626953125, |
|
"logps/chosen": -193.93162536621094, |
|
"logps/rejected": -213.35128784179688, |
|
"loss": 1.9612, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -13.50567626953125, |
|
"rewards/margins": 32.751338958740234, |
|
"rewards/rejected": -46.25701141357422, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 17.96255248447627, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": 6310.85986328125, |
|
"logits/rejected": 5030.54736328125, |
|
"logps/chosen": -213.32278442382812, |
|
"logps/rejected": -214.3714599609375, |
|
"loss": 1.9636, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -10.071836471557617, |
|
"rewards/margins": 29.769107818603516, |
|
"rewards/rejected": -39.8409423828125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6043956043956044, |
|
"grad_norm": 22.574869511207616, |
|
"learning_rate": 2.0299311362918773e-07, |
|
"logits/chosen": 6518.14404296875, |
|
"logits/rejected": 5643.26220703125, |
|
"logps/chosen": -245.22970581054688, |
|
"logps/rejected": -276.74285888671875, |
|
"loss": 1.9681, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -15.496047973632812, |
|
"rewards/margins": 37.38671112060547, |
|
"rewards/rejected": -52.88275909423828, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 31.521005261418978, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": 6350.6767578125, |
|
"logits/rejected": 6074.388671875, |
|
"logps/chosen": -227.0453643798828, |
|
"logps/rejected": -277.8321838378906, |
|
"loss": 1.9662, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -10.940030097961426, |
|
"rewards/margins": 29.504467010498047, |
|
"rewards/rejected": -40.44449996948242, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6200941915227629, |
|
"grad_norm": 19.737346143715538, |
|
"learning_rate": 1.8960819430126334e-07, |
|
"logits/chosen": 5919.3037109375, |
|
"logits/rejected": 5263.5703125, |
|
"logps/chosen": -215.43441772460938, |
|
"logps/rejected": -249.7850799560547, |
|
"loss": 1.9556, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -17.30341148376465, |
|
"rewards/margins": 43.883941650390625, |
|
"rewards/rejected": -61.187347412109375, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 18.29038936836771, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": 5920.49365234375, |
|
"logits/rejected": 5215.0712890625, |
|
"logps/chosen": -233.1874542236328, |
|
"logps/rejected": -261.2085266113281, |
|
"loss": 1.9726, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -16.46460723876953, |
|
"rewards/margins": 32.04566192626953, |
|
"rewards/rejected": -48.5102653503418, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6357927786499215, |
|
"grad_norm": 12.137685601024057, |
|
"learning_rate": 1.7640476790784075e-07, |
|
"logits/chosen": 5467.83251953125, |
|
"logits/rejected": 4939.65087890625, |
|
"logps/chosen": -212.20767211914062, |
|
"logps/rejected": -267.3034973144531, |
|
"loss": 1.9569, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -9.752274513244629, |
|
"rewards/margins": 36.25393295288086, |
|
"rewards/rejected": -46.00621032714844, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 13.63770692303666, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": 5939.91552734375, |
|
"logits/rejected": 4631.35205078125, |
|
"logps/chosen": -229.2455291748047, |
|
"logps/rejected": -209.6826629638672, |
|
"loss": 1.9476, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -8.030985832214355, |
|
"rewards/margins": 32.963321685791016, |
|
"rewards/rejected": -40.99430465698242, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6514913657770801, |
|
"grad_norm": 35.3592513671855, |
|
"learning_rate": 1.634225141467513e-07, |
|
"logits/chosen": 5885.0146484375, |
|
"logits/rejected": 5291.1865234375, |
|
"logps/chosen": -221.3533172607422, |
|
"logps/rejected": -254.42501831054688, |
|
"loss": 1.9561, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -17.167299270629883, |
|
"rewards/margins": 44.80288314819336, |
|
"rewards/rejected": -61.97017288208008, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 10.761005849291488, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": 6429.474609375, |
|
"logits/rejected": 5307.4501953125, |
|
"logps/chosen": -245.8719024658203, |
|
"logps/rejected": -261.5577087402344, |
|
"loss": 1.9591, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -18.54207420349121, |
|
"rewards/margins": 30.222482681274414, |
|
"rewards/rejected": -48.764556884765625, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6671899529042387, |
|
"grad_norm": 16.29666584290201, |
|
"learning_rate": 1.5070044803508691e-07, |
|
"logits/chosen": 5947.8916015625, |
|
"logits/rejected": 5381.05810546875, |
|
"logps/chosen": -227.6597900390625, |
|
"logps/rejected": -256.93426513671875, |
|
"loss": 1.9444, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -12.182321548461914, |
|
"rewards/margins": 44.29661560058594, |
|
"rewards/rejected": -56.47894287109375, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 14.229262245757536, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": 6413.134765625, |
|
"logits/rejected": 5485.0712890625, |
|
"logps/chosen": -259.97039794921875, |
|
"logps/rejected": -254.68661499023438, |
|
"loss": 1.9651, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -12.199740409851074, |
|
"rewards/margins": 39.80502700805664, |
|
"rewards/rejected": -52.00476837158203, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6828885400313972, |
|
"grad_norm": 14.985139051030577, |
|
"learning_rate": 1.3827680265902232e-07, |
|
"logits/chosen": 6375.4150390625, |
|
"logits/rejected": 5318.18603515625, |
|
"logps/chosen": -241.5561065673828, |
|
"logps/rejected": -245.6100616455078, |
|
"loss": 1.9577, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -11.459368705749512, |
|
"rewards/margins": 33.461753845214844, |
|
"rewards/rejected": -44.92112731933594, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 21.012999888781128, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": 5609.716796875, |
|
"logits/rejected": 5388.8818359375, |
|
"logps/chosen": -221.8362579345703, |
|
"logps/rejected": -279.399658203125, |
|
"loss": 1.9613, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -11.325273513793945, |
|
"rewards/margins": 36.48521423339844, |
|
"rewards/rejected": -47.81048583984375, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6985871271585558, |
|
"grad_norm": 16.069902059846896, |
|
"learning_rate": 1.2618891427354172e-07, |
|
"logits/chosen": 6615.9248046875, |
|
"logits/rejected": 5418.08447265625, |
|
"logps/chosen": -268.32244873046875, |
|
"logps/rejected": -266.2985534667969, |
|
"loss": 1.9596, |
|
"rewards/accuracies": 0.7999999523162842, |
|
"rewards/chosen": -10.72636890411377, |
|
"rewards/margins": 43.791648864746094, |
|
"rewards/rejected": -54.51801681518555, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 14.83296947009343, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": 5605.28662109375, |
|
"logits/rejected": 5226.92236328125, |
|
"logps/chosen": -214.8007049560547, |
|
"logps/rejected": -256.61541748046875, |
|
"loss": 1.9601, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -10.451518058776855, |
|
"rewards/margins": 35.73381805419922, |
|
"rewards/rejected": -46.185333251953125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 16.04357761307859, |
|
"learning_rate": 1.1447311009737299e-07, |
|
"logits/chosen": 5512.8681640625, |
|
"logits/rejected": 5264.71142578125, |
|
"logps/chosen": -220.86276245117188, |
|
"logps/rejected": -260.4016418457031, |
|
"loss": 1.9546, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -12.636472702026367, |
|
"rewards/margins": 39.836666107177734, |
|
"rewards/rejected": -52.47313690185547, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 11.14000447509858, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": 6096.97314453125, |
|
"logits/rejected": 5090.04052734375, |
|
"logps/chosen": -245.5460968017578, |
|
"logps/rejected": -248.48770141601562, |
|
"loss": 1.9524, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -13.124479293823242, |
|
"rewards/margins": 37.220909118652344, |
|
"rewards/rejected": -50.34539031982422, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7299843014128728, |
|
"grad_norm": 15.02248631232516, |
|
"learning_rate": 1.0316459914033793e-07, |
|
"logits/chosen": 6012.04833984375, |
|
"logits/rejected": 4572.67333984375, |
|
"logps/chosen": -253.6759796142578, |
|
"logps/rejected": -244.62939453125, |
|
"loss": 1.9584, |
|
"rewards/accuracies": 0.7499999403953552, |
|
"rewards/chosen": -17.687047958374023, |
|
"rewards/margins": 41.01814651489258, |
|
"rewards/rejected": -58.70519256591797, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 14.78768614756301, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": 5824.52392578125, |
|
"logits/rejected": 5130.29150390625, |
|
"logps/chosen": -231.50732421875, |
|
"logps/rejected": -265.863525390625, |
|
"loss": 1.9506, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.213656425476074, |
|
"rewards/margins": 47.669952392578125, |
|
"rewards/rejected": -60.88360595703125, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7456828885400314, |
|
"grad_norm": 22.1690955607781, |
|
"learning_rate": 9.229736639120561e-08, |
|
"logits/chosen": 5999.9892578125, |
|
"logits/rejected": 5568.5703125, |
|
"logps/chosen": -230.053955078125, |
|
"logps/rejected": -253.02633666992188, |
|
"loss": 1.9589, |
|
"rewards/accuracies": 0.7833333015441895, |
|
"rewards/chosen": -12.252751350402832, |
|
"rewards/margins": 26.61910057067871, |
|
"rewards/rejected": -38.871849060058594, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 17.03053961858823, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": 5853.4912109375, |
|
"logits/rejected": 5239.51708984375, |
|
"logps/chosen": -227.2183380126953, |
|
"logps/rejected": -253.36898803710938, |
|
"loss": 1.9478, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -8.789916038513184, |
|
"rewards/margins": 42.10274887084961, |
|
"rewards/rejected": -50.892677307128906, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7613814756671899, |
|
"grad_norm": 24.752863800000334, |
|
"learning_rate": 8.19040706840472e-08, |
|
"logits/chosen": 5959.28515625, |
|
"logits/rejected": 5025.02099609375, |
|
"logps/chosen": -251.1478729248047, |
|
"logps/rejected": -268.1574401855469, |
|
"loss": 1.9401, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -10.927900314331055, |
|
"rewards/margins": 50.53899383544922, |
|
"rewards/rejected": -61.46689987182617, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 16.610566886947357, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": 5662.68359375, |
|
"logits/rejected": 4593.669921875, |
|
"logps/chosen": -239.620849609375, |
|
"logps/rejected": -265.27178955078125, |
|
"loss": 1.9424, |
|
"rewards/accuracies": 0.8083332777023315, |
|
"rewards/chosen": -10.80424690246582, |
|
"rewards/margins": 63.973960876464844, |
|
"rewards/rejected": -74.77821350097656, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7770800627943485, |
|
"grad_norm": 36.937457019710536, |
|
"learning_rate": 7.201594655002458e-08, |
|
"logits/chosen": 5990.30712890625, |
|
"logits/rejected": 5043.2490234375, |
|
"logps/chosen": -240.4677734375, |
|
"logps/rejected": -260.95574951171875, |
|
"loss": 1.9499, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -14.572827339172363, |
|
"rewards/margins": 52.31321334838867, |
|
"rewards/rejected": -66.88603973388672, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 23.03062466270963, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": 5678.6806640625, |
|
"logits/rejected": 4839.29931640625, |
|
"logps/chosen": -227.02432250976562, |
|
"logps/rejected": -228.1048126220703, |
|
"loss": 1.9563, |
|
"rewards/accuracies": 0.6916666626930237, |
|
"rewards/chosen": -13.268468856811523, |
|
"rewards/margins": 34.491050720214844, |
|
"rewards/rejected": -47.759521484375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.792778649921507, |
|
"grad_norm": 14.87512248096038, |
|
"learning_rate": 6.26627103495786e-08, |
|
"logits/chosen": 5860.2373046875, |
|
"logits/rejected": 4920.7861328125, |
|
"logps/chosen": -224.3702850341797, |
|
"logps/rejected": -244.62551879882812, |
|
"loss": 1.9538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -9.661078453063965, |
|
"rewards/margins": 43.53081512451172, |
|
"rewards/rejected": -53.19189453125, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 14.748532824208521, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": 5948.0859375, |
|
"logits/rejected": 5325.62939453125, |
|
"logps/chosen": -256.8372802734375, |
|
"logps/rejected": -279.39373779296875, |
|
"loss": 1.9608, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -8.658674240112305, |
|
"rewards/margins": 44.65437316894531, |
|
"rewards/rejected": -53.31304931640625, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8084772370486656, |
|
"grad_norm": 13.111496123331063, |
|
"learning_rate": 5.38724709671092e-08, |
|
"logits/chosen": 6350.22802734375, |
|
"logits/rejected": 6027.82177734375, |
|
"logps/chosen": -243.10403442382812, |
|
"logps/rejected": -286.2555847167969, |
|
"loss": 1.95, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -11.635639190673828, |
|
"rewards/margins": 41.500667572021484, |
|
"rewards/rejected": -53.13630294799805, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 13.785799458642654, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": 5649.2197265625, |
|
"logits/rejected": 5181.4765625, |
|
"logps/chosen": -237.51126098632812, |
|
"logps/rejected": -271.2793273925781, |
|
"loss": 1.95, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -17.409549713134766, |
|
"rewards/margins": 44.685813903808594, |
|
"rewards/rejected": -62.095359802246094, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8241758241758241, |
|
"grad_norm": 18.785044476818584, |
|
"learning_rate": 4.5671645336537416e-08, |
|
"logits/chosen": 5704.8623046875, |
|
"logits/rejected": 5225.52587890625, |
|
"logps/chosen": -251.41500854492188, |
|
"logps/rejected": -279.2231750488281, |
|
"loss": 1.9535, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -13.934377670288086, |
|
"rewards/margins": 49.494422912597656, |
|
"rewards/rejected": -63.428802490234375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 26.490352891661313, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": 7039.8828125, |
|
"logits/rejected": 5571.68212890625, |
|
"logps/chosen": -277.3167419433594, |
|
"logps/rejected": -269.80999755859375, |
|
"loss": 1.9639, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -16.329357147216797, |
|
"rewards/margins": 33.44147491455078, |
|
"rewards/rejected": -49.770835876464844, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8398744113029827, |
|
"grad_norm": 22.94047462012029, |
|
"learning_rate": 3.8084879051612144e-08, |
|
"logits/chosen": 5870.64013671875, |
|
"logits/rejected": 5412.8837890625, |
|
"logps/chosen": -234.2925262451172, |
|
"logps/rejected": -246.66885375976562, |
|
"loss": 1.953, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -9.04952335357666, |
|
"rewards/margins": 44.45494842529297, |
|
"rewards/rejected": -53.50446701049805, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 15.970389728332213, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": 6428.0244140625, |
|
"logits/rejected": 5009.7216796875, |
|
"logps/chosen": -274.9081726074219, |
|
"logps/rejected": -264.4786376953125, |
|
"loss": 1.9422, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -8.338837623596191, |
|
"rewards/margins": 55.159873962402344, |
|
"rewards/rejected": -63.49870681762695, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8555729984301413, |
|
"grad_norm": 18.198254677065798, |
|
"learning_rate": 3.11349722995527e-08, |
|
"logits/chosen": 6511.92822265625, |
|
"logits/rejected": 4918.216796875, |
|
"logps/chosen": -242.1095733642578, |
|
"logps/rejected": -269.7752990722656, |
|
"loss": 1.9586, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -14.634966850280762, |
|
"rewards/margins": 42.25577926635742, |
|
"rewards/rejected": -56.8907470703125, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 16.804338867644905, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": 6285.3818359375, |
|
"logits/rejected": 5230.830078125, |
|
"logps/chosen": -252.6708526611328, |
|
"logps/rejected": -250.3868408203125, |
|
"loss": 1.9605, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -14.134493827819824, |
|
"rewards/margins": 46.755001068115234, |
|
"rewards/rejected": -60.889495849609375, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8712715855572999, |
|
"grad_norm": 16.045778309961047, |
|
"learning_rate": 2.484281134061142e-08, |
|
"logits/chosen": 6644.92041015625, |
|
"logits/rejected": 5396.99462890625, |
|
"logps/chosen": -279.4850158691406, |
|
"logps/rejected": -281.66839599609375, |
|
"loss": 1.9534, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -14.871435165405273, |
|
"rewards/margins": 44.52893829345703, |
|
"rewards/rejected": -59.40037155151367, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 25.938670718923635, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": 6502.3291015625, |
|
"logits/rejected": 5317.71044921875, |
|
"logps/chosen": -260.4866638183594, |
|
"logps/rejected": -276.68133544921875, |
|
"loss": 1.9533, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -15.346229553222656, |
|
"rewards/margins": 44.5643310546875, |
|
"rewards/rejected": -59.910552978515625, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8869701726844584, |
|
"grad_norm": 24.916434094111068, |
|
"learning_rate": 1.9227305739481612e-08, |
|
"logits/chosen": 5917.8349609375, |
|
"logits/rejected": 4701.77978515625, |
|
"logps/chosen": -244.61825561523438, |
|
"logps/rejected": -235.84878540039062, |
|
"loss": 1.9449, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -9.836224555969238, |
|
"rewards/margins": 45.78696823120117, |
|
"rewards/rejected": -55.623199462890625, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 14.382726209860957, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": 6504.939453125, |
|
"logits/rejected": 5327.1845703125, |
|
"logps/chosen": -270.0257873535156, |
|
"logps/rejected": -271.5950622558594, |
|
"loss": 1.9525, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -12.405012130737305, |
|
"rewards/margins": 38.37703323364258, |
|
"rewards/rejected": -50.78205108642578, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.902668759811617, |
|
"grad_norm": 14.78835811924378, |
|
"learning_rate": 1.4305331537183384e-08, |
|
"logits/chosen": 5756.44970703125, |
|
"logits/rejected": 5324.578125, |
|
"logps/chosen": -238.2313690185547, |
|
"logps/rejected": -266.89166259765625, |
|
"loss": 1.9446, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -11.247991561889648, |
|
"rewards/margins": 36.155738830566406, |
|
"rewards/rejected": -47.40373229980469, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 23.21802503519393, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": 5926.2421875, |
|
"logits/rejected": 4742.0224609375, |
|
"logps/chosen": -244.4608917236328, |
|
"logps/rejected": -248.4634552001953, |
|
"loss": 1.9544, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -15.904090881347656, |
|
"rewards/margins": 36.621253967285156, |
|
"rewards/rejected": -52.52534103393555, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 22.422532860536233, |
|
"learning_rate": 1.0091680534213387e-08, |
|
"logits/chosen": 6495.7001953125, |
|
"logits/rejected": 6269.51904296875, |
|
"logps/chosen": -256.6058654785156, |
|
"logps/rejected": -297.3378601074219, |
|
"loss": 1.9577, |
|
"rewards/accuracies": 0.7499999403953552, |
|
"rewards/chosen": -13.657504081726074, |
|
"rewards/margins": 36.93338394165039, |
|
"rewards/rejected": -50.59088897705078, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 19.603883456580654, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": 5720.74365234375, |
|
"logits/rejected": 5222.8046875, |
|
"logps/chosen": -253.0874786376953, |
|
"logps/rejected": -268.9738464355469, |
|
"loss": 1.9593, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -13.141489028930664, |
|
"rewards/margins": 39.01056671142578, |
|
"rewards/rejected": -52.152061462402344, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9340659340659341, |
|
"grad_norm": 17.033230362077003, |
|
"learning_rate": 6.599015837372907e-09, |
|
"logits/chosen": 6205.9697265625, |
|
"logits/rejected": 5447.80859375, |
|
"logps/chosen": -269.47576904296875, |
|
"logps/rejected": -276.09552001953125, |
|
"loss": 1.949, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -20.590267181396484, |
|
"rewards/margins": 38.352821350097656, |
|
"rewards/rejected": -58.943084716796875, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 16.90413077812471, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": 6419.7333984375, |
|
"logits/rejected": 5498.9658203125, |
|
"logps/chosen": -267.5433044433594, |
|
"logps/rejected": -279.40057373046875, |
|
"loss": 1.9507, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -11.881638526916504, |
|
"rewards/margins": 48.85334014892578, |
|
"rewards/rejected": -60.7349853515625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9497645211930926, |
|
"grad_norm": 36.17870464463534, |
|
"learning_rate": 3.837833803870177e-09, |
|
"logits/chosen": 6007.43603515625, |
|
"logits/rejected": 5288.4658203125, |
|
"logps/chosen": -252.74148559570312, |
|
"logps/rejected": -278.2850646972656, |
|
"loss": 1.9559, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -12.555973052978516, |
|
"rewards/margins": 47.52782440185547, |
|
"rewards/rejected": -60.08379364013672, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 16.61938883168565, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": 7483.11962890625, |
|
"logits/rejected": 5542.7509765625, |
|
"logps/chosen": -313.7238464355469, |
|
"logps/rejected": -300.87921142578125, |
|
"loss": 1.9611, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": -16.955432891845703, |
|
"rewards/margins": 38.72659683227539, |
|
"rewards/rejected": -55.682029724121094, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9654631083202512, |
|
"grad_norm": 16.700922559123722, |
|
"learning_rate": 1.8164324970625645e-09, |
|
"logits/chosen": 6656.83984375, |
|
"logits/rejected": 5284.20751953125, |
|
"logps/chosen": -271.3492431640625, |
|
"logps/rejected": -268.39617919921875, |
|
"loss": 1.9547, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -10.767581939697266, |
|
"rewards/margins": 44.63188171386719, |
|
"rewards/rejected": -55.39946746826172, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 14.035772074810767, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": 5738.1884765625, |
|
"logits/rejected": 5294.599609375, |
|
"logps/chosen": -250.1355438232422, |
|
"logps/rejected": -292.23736572265625, |
|
"loss": 1.9484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -18.523162841796875, |
|
"rewards/margins": 38.556339263916016, |
|
"rewards/rejected": -57.079505920410156, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9811616954474097, |
|
"grad_norm": 14.793452380633976, |
|
"learning_rate": 5.408867486384471e-10, |
|
"logits/chosen": 5854.9208984375, |
|
"logits/rejected": 4967.2197265625, |
|
"logps/chosen": -241.21725463867188, |
|
"logps/rejected": -235.61495971679688, |
|
"loss": 1.9573, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -10.9091215133667, |
|
"rewards/margins": 35.27557373046875, |
|
"rewards/rejected": -46.184696197509766, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 16.612160087002778, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": 5532.6669921875, |
|
"logits/rejected": 5269.6572265625, |
|
"logps/chosen": -246.49185180664062, |
|
"logps/rejected": -263.04144287109375, |
|
"loss": 1.9554, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -14.331242561340332, |
|
"rewards/margins": 32.38512420654297, |
|
"rewards/rejected": -46.71636962890625, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9968602825745683, |
|
"grad_norm": 21.26616020403944, |
|
"learning_rate": 1.502990218302247e-11, |
|
"logits/chosen": 5808.15283203125, |
|
"logits/rejected": 4749.75537109375, |
|
"logps/chosen": -237.1238555908203, |
|
"logps/rejected": -240.21200561523438, |
|
"loss": 1.9474, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -12.649885177612305, |
|
"rewards/margins": 41.257911682128906, |
|
"rewards/rejected": -53.907798767089844, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1274, |
|
"total_flos": 0.0, |
|
"train_loss": 1.9640779633724146, |
|
"train_runtime": 14760.7646, |
|
"train_samples_per_second": 4.142, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1274, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|