|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 80000000000, |
|
"global_step": 1676, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 51.25, |
|
"learning_rate": 2.9761904761904764e-08, |
|
"logits/chosen": 1.0806633234024048, |
|
"logits/rejected": 1.027681589126587, |
|
"logps/chosen": -234.4534149169922, |
|
"logps/rejected": -543.556884765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": 0.0, |
|
"rewards/student_margin": 0.0, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 51.0, |
|
"learning_rate": 2.9761904761904765e-07, |
|
"logits/chosen": 0.9768059849739075, |
|
"logits/rejected": 0.994023323059082, |
|
"logps/chosen": -286.24139404296875, |
|
"logps/rejected": -447.72210693359375, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.5925925970077515, |
|
"rewards/chosen": 0.09765356779098511, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -0.0830247700214386, |
|
"rewards/student_margin": 0.1806783229112625, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 47.25, |
|
"learning_rate": 5.952380952380953e-07, |
|
"logits/chosen": 0.7909770011901855, |
|
"logits/rejected": 0.8625937700271606, |
|
"logps/chosen": -246.6891326904297, |
|
"logps/rejected": -391.81671142578125, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.0017502456903457642, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -0.08085530996322632, |
|
"rewards/student_margin": 0.07910505682229996, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 50.25, |
|
"learning_rate": 8.928571428571429e-07, |
|
"logits/chosen": 0.804156482219696, |
|
"logits/rejected": 0.9018067121505737, |
|
"logps/chosen": -266.2332458496094, |
|
"logps/rejected": -457.662109375, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.46666669845581055, |
|
"rewards/chosen": -0.014444669708609581, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -0.11011429131031036, |
|
"rewards/student_margin": 0.09566961228847504, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 36.75, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"logits/chosen": 1.0210270881652832, |
|
"logits/rejected": 0.9873097538948059, |
|
"logps/chosen": -277.4912109375, |
|
"logps/rejected": -495.9727478027344, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11192125082015991, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -0.7635596990585327, |
|
"rewards/student_margin": 0.651638388633728, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 34.5, |
|
"learning_rate": 1.4880952380952381e-06, |
|
"logits/chosen": 0.935893714427948, |
|
"logits/rejected": 0.9109848737716675, |
|
"logps/chosen": -277.14019775390625, |
|
"logps/rejected": -438.00811767578125, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.8333331942558289, |
|
"rewards/chosen": -0.0486629493534565, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -0.8220487833023071, |
|
"rewards/student_margin": 0.7733858227729797, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 23.625, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"logits/chosen": 0.8492110371589661, |
|
"logits/rejected": 1.239457130432129, |
|
"logps/chosen": -279.10772705078125, |
|
"logps/rejected": -522.7730102539062, |
|
"loss": 0.3142, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.2660249173641205, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -1.5817127227783203, |
|
"rewards/student_margin": 1.8477375507354736, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.0, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": 1.1166276931762695, |
|
"logits/rejected": 0.8739174008369446, |
|
"logps/chosen": -297.13970947265625, |
|
"logps/rejected": -374.25396728515625, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.5280119776725769, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -1.6486384868621826, |
|
"rewards/student_margin": 2.1766505241394043, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 28.625, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.9739519953727722, |
|
"logits/rejected": 0.9922908544540405, |
|
"logps/chosen": -282.2953186035156, |
|
"logps/rejected": -461.89306640625, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.6085111498832703, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -2.7903707027435303, |
|
"rewards/student_margin": 3.3988823890686035, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 23.375, |
|
"learning_rate": 2.6785714285714285e-06, |
|
"logits/chosen": 0.7982707023620605, |
|
"logits/rejected": 0.9940367937088013, |
|
"logps/chosen": -283.6894836425781, |
|
"logps/rejected": -495.60546875, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.49286383390426636, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.6416537761688232, |
|
"rewards/student_margin": 4.134517669677734, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 23.375, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"logits/chosen": 0.8860418200492859, |
|
"logits/rejected": 0.9776609539985657, |
|
"logps/chosen": -239.5576629638672, |
|
"logps/rejected": -414.81072998046875, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 1.0381144285202026, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -2.6516735553741455, |
|
"rewards/student_margin": 3.6897876262664795, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 16.875, |
|
"learning_rate": 3.273809523809524e-06, |
|
"logits/chosen": 0.9802883267402649, |
|
"logits/rejected": 0.940815269947052, |
|
"logps/chosen": -284.52581787109375, |
|
"logps/rejected": -423.5133361816406, |
|
"loss": 0.2444, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.42794662714004517, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.9260330200195312, |
|
"rewards/student_margin": 4.35398006439209, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.25, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": 1.0117547512054443, |
|
"logits/rejected": 0.9276782274246216, |
|
"logps/chosen": -289.87164306640625, |
|
"logps/rejected": -467.48779296875, |
|
"loss": 0.1975, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.7855817675590515, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.403926849365234, |
|
"rewards/student_margin": 5.189507961273193, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 3.869047619047619e-06, |
|
"logits/chosen": 0.8812491297721863, |
|
"logits/rejected": 1.1077783107757568, |
|
"logps/chosen": -244.08840942382812, |
|
"logps/rejected": -470.8115234375, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.22790038585662842, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.922947645187378, |
|
"rewards/student_margin": 4.150847911834717, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 18.625, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": 0.9511512517929077, |
|
"logits/rejected": 1.0154036283493042, |
|
"logps/chosen": -263.3562927246094, |
|
"logps/rejected": -441.83441162109375, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.22430308163166046, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.9950404167175293, |
|
"rewards/student_margin": 4.219343185424805, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 25.0, |
|
"learning_rate": 4.464285714285715e-06, |
|
"logits/chosen": 0.7221236228942871, |
|
"logits/rejected": 0.9142163991928101, |
|
"logps/chosen": -262.82550048828125, |
|
"logps/rejected": -474.90301513671875, |
|
"loss": 0.1981, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27112048864364624, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.031813621520996, |
|
"rewards/student_margin": 5.302934646606445, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 21.25, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.971523642539978, |
|
"logits/rejected": 0.9104828834533691, |
|
"logps/chosen": -313.972412109375, |
|
"logps/rejected": -449.889404296875, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.7551168203353882, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.720338821411133, |
|
"rewards/student_margin": 5.475455284118652, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 4.999978299664999e-06, |
|
"logits/chosen": 0.7932542562484741, |
|
"logits/rejected": 0.9889429807662964, |
|
"logps/chosen": -282.11749267578125, |
|
"logps/rejected": -486.21661376953125, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.047555923461914, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.129283905029297, |
|
"rewards/student_margin": 5.176839351654053, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 37.0, |
|
"learning_rate": 4.999218827495206e-06, |
|
"logits/chosen": 1.1726105213165283, |
|
"logits/rejected": 0.7503201365470886, |
|
"logps/chosen": -298.88177490234375, |
|
"logps/rejected": -376.23724365234375, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.2353880405426025, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.8243496417999268, |
|
"rewards/student_margin": 5.059737682342529, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 20.75, |
|
"learning_rate": 4.99737471526959e-06, |
|
"logits/chosen": 0.9437912106513977, |
|
"logits/rejected": 0.8459140062332153, |
|
"logps/chosen": -272.1702880859375, |
|
"logps/rejected": -441.1572265625, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.4726988673210144, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.565393447875977, |
|
"rewards/student_margin": 5.038092136383057, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 4.99444676331742e-06, |
|
"logits/chosen": 0.8719295263290405, |
|
"logits/rejected": 0.8616917729377747, |
|
"logps/chosen": -259.5501403808594, |
|
"logps/rejected": -482.0235900878906, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.0355851650238037, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.009357452392578, |
|
"rewards/student_margin": 6.044942855834961, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 45.0, |
|
"learning_rate": 4.990436242345344e-06, |
|
"logits/chosen": 0.7471588253974915, |
|
"logits/rejected": 0.7325921058654785, |
|
"logps/chosen": -276.40740966796875, |
|
"logps/rejected": -429.87298583984375, |
|
"loss": 0.1675, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.49575358629226685, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.393766403198242, |
|
"rewards/student_margin": 5.889519691467285, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 15.8125, |
|
"learning_rate": 4.985344892885899e-06, |
|
"logits/chosen": 0.8454818725585938, |
|
"logits/rejected": 0.8547420501708984, |
|
"logps/chosen": -312.19378662109375, |
|
"logps/rejected": -419.61456298828125, |
|
"loss": 0.2657, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -0.12324689328670502, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.782127857208252, |
|
"rewards/student_margin": 4.658881187438965, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.979174924542143e-06, |
|
"logits/chosen": 0.8820652961730957, |
|
"logits/rejected": 0.8253690600395203, |
|
"logps/chosen": -273.909912109375, |
|
"logps/rejected": -413.5152893066406, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.9943733215332031, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.524769306182861, |
|
"rewards/student_margin": 5.519143104553223, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 22.375, |
|
"learning_rate": 4.971929015028702e-06, |
|
"logits/chosen": 0.8912525177001953, |
|
"logits/rejected": 0.8689953088760376, |
|
"logps/chosen": -279.77288818359375, |
|
"logps/rejected": -484.94989013671875, |
|
"loss": 0.2321, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.6567622423171997, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.584259986877441, |
|
"rewards/student_margin": 6.24102258682251, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 50.25, |
|
"learning_rate": 4.963610309009666e-06, |
|
"logits/chosen": 0.8293606638908386, |
|
"logits/rejected": 1.0634894371032715, |
|
"logps/chosen": -265.6617126464844, |
|
"logps/rejected": -453.5665588378906, |
|
"loss": 0.243, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 0.2059473693370819, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.642806053161621, |
|
"rewards/student_margin": 5.848753929138184, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.9542224167338295e-06, |
|
"logits/chosen": 1.0618178844451904, |
|
"logits/rejected": 1.0457861423492432, |
|
"logps/chosen": -300.09442138671875, |
|
"logps/rejected": -466.44940185546875, |
|
"loss": 0.1425, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.8752765655517578, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.504293441772461, |
|
"rewards/student_margin": 5.379570007324219, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 19.75, |
|
"learning_rate": 4.943769412467875e-06, |
|
"logits/chosen": 0.7705072164535522, |
|
"logits/rejected": 0.7112764120101929, |
|
"logps/chosen": -269.6720886230469, |
|
"logps/rejected": -412.3421936035156, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.1547017097473145, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.43593692779541, |
|
"rewards/student_margin": 5.590639114379883, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 26.125, |
|
"learning_rate": 4.932255832728177e-06, |
|
"logits/chosen": 0.86072838306427, |
|
"logits/rejected": 0.8093738555908203, |
|
"logps/chosen": -244.2378692626953, |
|
"logps/rejected": -412.3228454589844, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.6512540578842163, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.44444465637207, |
|
"rewards/student_margin": 6.095698356628418, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.919686674311988e-06, |
|
"logits/chosen": 0.850325882434845, |
|
"logits/rejected": 1.0066044330596924, |
|
"logps/chosen": -254.3390350341797, |
|
"logps/rejected": -435.7002868652344, |
|
"loss": 0.2592, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.8666278123855591, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.611154556274414, |
|
"rewards/student_margin": 6.477782249450684, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 4.906067392128872e-06, |
|
"logits/chosen": 0.6758413314819336, |
|
"logits/rejected": 0.6797875761985779, |
|
"logps/chosen": -257.7704772949219, |
|
"logps/rejected": -373.86798095703125, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08449643850326538, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.3420000076293945, |
|
"rewards/student_margin": 4.4264960289001465, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 4.89140389683332e-06, |
|
"logits/chosen": 0.8578267097473145, |
|
"logits/rejected": 0.9068147540092468, |
|
"logps/chosen": -265.58837890625, |
|
"logps/rejected": -467.61907958984375, |
|
"loss": 0.1799, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.2945629358291626, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.4947590827941895, |
|
"rewards/student_margin": 6.7893218994140625, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 47.5, |
|
"learning_rate": 4.8757025522595755e-06, |
|
"logits/chosen": 0.6410337090492249, |
|
"logits/rejected": 0.9101980328559875, |
|
"logps/chosen": -253.18478393554688, |
|
"logps/rejected": -446.0069274902344, |
|
"loss": 0.143, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.7156537771224976, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.835994720458984, |
|
"rewards/student_margin": 5.5516486167907715, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 50.5, |
|
"learning_rate": 4.858970172659785e-06, |
|
"logits/chosen": 0.8477473258972168, |
|
"logits/rejected": 0.8235591650009155, |
|
"logps/chosen": -273.240478515625, |
|
"logps/rejected": -500.45135498046875, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.6796804666519165, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.122203350067139, |
|
"rewards/student_margin": 6.801883697509766, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 18.625, |
|
"learning_rate": 4.8412140197466626e-06, |
|
"logits/chosen": 1.056730031967163, |
|
"logits/rejected": 0.7656959295272827, |
|
"logps/chosen": -295.1404113769531, |
|
"logps/rejected": -414.6414489746094, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.1913378238677979, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.893978118896484, |
|
"rewards/student_margin": 6.085315227508545, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 24.625, |
|
"learning_rate": 4.822441799541979e-06, |
|
"logits/chosen": 0.6411978006362915, |
|
"logits/rejected": 1.0087072849273682, |
|
"logps/chosen": -234.6625518798828, |
|
"logps/rejected": -497.6658630371094, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.9057310819625854, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.054079532623291, |
|
"rewards/student_margin": 5.959811210632324, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 19.75, |
|
"learning_rate": 4.802661659032201e-06, |
|
"logits/chosen": 0.9185107946395874, |
|
"logits/rejected": 0.8089292645454407, |
|
"logps/chosen": -299.4004211425781, |
|
"logps/rejected": -413.39422607421875, |
|
"loss": 0.167, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 1.0747997760772705, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.140660285949707, |
|
"rewards/student_margin": 6.215460777282715, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 4.781882182632777e-06, |
|
"logits/chosen": 0.9023804664611816, |
|
"logits/rejected": 0.8916164636611938, |
|
"logps/chosen": -255.41909790039062, |
|
"logps/rejected": -427.27880859375, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.0671674013137817, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.464870452880859, |
|
"rewards/student_margin": 5.532038688659668, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 19.5, |
|
"learning_rate": 4.760112388462565e-06, |
|
"logits/chosen": 1.047695517539978, |
|
"logits/rejected": 1.1817049980163574, |
|
"logps/chosen": -274.990234375, |
|
"logps/rejected": -495.79705810546875, |
|
"loss": 0.2418, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.7795820236206055, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.480805397033691, |
|
"rewards/student_margin": 6.260386943817139, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 4.737361724430048e-06, |
|
"logits/chosen": 1.0343639850616455, |
|
"logits/rejected": 0.9025303721427917, |
|
"logps/chosen": -284.13348388671875, |
|
"logps/rejected": -400.54278564453125, |
|
"loss": 0.1948, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2158838510513306, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.840426206588745, |
|
"rewards/student_margin": 5.056310176849365, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 29.75, |
|
"learning_rate": 4.7136400641330245e-06, |
|
"logits/chosen": 0.9151691198348999, |
|
"logits/rejected": 1.0338377952575684, |
|
"logps/chosen": -295.9420166015625, |
|
"logps/rejected": -489.6417541503906, |
|
"loss": 0.2565, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 0.1835143268108368, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.181533336639404, |
|
"rewards/student_margin": 5.365047931671143, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 31.125, |
|
"learning_rate": 4.6889577025735425e-06, |
|
"logits/chosen": 0.7703173756599426, |
|
"logits/rejected": 0.809980034828186, |
|
"logps/chosen": -285.85540771484375, |
|
"logps/rejected": -461.5099182128906, |
|
"loss": 0.2758, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4834997653961182, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.809512138366699, |
|
"rewards/student_margin": 6.293011665344238, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.625, |
|
"learning_rate": 4.663325351689957e-06, |
|
"logits/chosen": 0.7997140884399414, |
|
"logits/rejected": 1.1446664333343506, |
|
"logps/chosen": -272.6542053222656, |
|
"logps/rejected": -460.8824768066406, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.7220415472984314, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.998138427734375, |
|
"rewards/student_margin": 4.720179557800293, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.5, |
|
"learning_rate": 4.636754135708041e-06, |
|
"logits/chosen": 0.592836856842041, |
|
"logits/rejected": 0.828029453754425, |
|
"logps/chosen": -214.758544921875, |
|
"logps/rejected": -429.4766540527344, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.9377529621124268, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.47792387008667, |
|
"rewards/student_margin": 6.415676116943359, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 25.5, |
|
"learning_rate": 4.609255586313166e-06, |
|
"logits/chosen": 0.9192422032356262, |
|
"logits/rejected": 0.8996514081954956, |
|
"logps/chosen": -300.15411376953125, |
|
"logps/rejected": -433.31414794921875, |
|
"loss": 0.1421, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.9012549519538879, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.594986438751221, |
|
"rewards/student_margin": 5.496241569519043, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 17.375, |
|
"learning_rate": 4.580841637645638e-06, |
|
"logits/chosen": 0.83323734998703, |
|
"logits/rejected": 0.9192069172859192, |
|
"logps/chosen": -313.2845458984375, |
|
"logps/rejected": -505.2017517089844, |
|
"loss": 0.2334, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.4484587609767914, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.4181952476501465, |
|
"rewards/student_margin": 5.866654396057129, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 41.5, |
|
"learning_rate": 4.551524621121391e-06, |
|
"logits/chosen": 0.9528602361679077, |
|
"logits/rejected": 0.8358736038208008, |
|
"logps/chosen": -303.16326904296875, |
|
"logps/rejected": -482.1064453125, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2860808372497559, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.5447678565979, |
|
"rewards/student_margin": 7.830848693847656, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 12.375, |
|
"learning_rate": 4.52131726008025e-06, |
|
"logits/chosen": 0.6702090501785278, |
|
"logits/rejected": 0.7374387383460999, |
|
"logps/chosen": -261.7665100097656, |
|
"logps/rejected": -397.46844482421875, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.5471059083938599, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.151522636413574, |
|
"rewards/student_margin": 5.6986284255981445, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 4.49023266426411e-06, |
|
"logits/chosen": 0.9892054796218872, |
|
"logits/rejected": 0.7039884328842163, |
|
"logps/chosen": -295.9824523925781, |
|
"logps/rejected": -426.09747314453125, |
|
"loss": 0.2412, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.7493427395820618, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.928679466247559, |
|
"rewards/student_margin": 5.6780219078063965, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 37.0, |
|
"learning_rate": 4.458284324127415e-06, |
|
"logits/chosen": 0.5855517387390137, |
|
"logits/rejected": 0.8191647529602051, |
|
"logps/chosen": -259.15338134765625, |
|
"logps/rejected": -469.5276794433594, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.588661789894104, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.215652942657471, |
|
"rewards/student_margin": 6.804314613342285, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 4.425486104982418e-06, |
|
"logits/chosen": 0.6128954291343689, |
|
"logits/rejected": 0.7978076934814453, |
|
"logps/chosen": -284.3572692871094, |
|
"logps/rejected": -501.80731201171875, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.8997933268547058, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.41839075088501, |
|
"rewards/student_margin": 6.318183422088623, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.75, |
|
"learning_rate": 4.391852240981749e-06, |
|
"logits/chosen": 0.8700593709945679, |
|
"logits/rejected": 0.7678433060646057, |
|
"logps/chosen": -315.51397705078125, |
|
"logps/rejected": -463.90325927734375, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.8418912887573242, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.582736492156982, |
|
"rewards/student_margin": 6.424628257751465, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 52.0, |
|
"learning_rate": 4.357397328940909e-06, |
|
"logits/chosen": 0.7347143888473511, |
|
"logits/rejected": 0.6636631488800049, |
|
"logps/chosen": -261.46063232421875, |
|
"logps/rejected": -398.423828125, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.5999925136566162, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -3.0548489093780518, |
|
"rewards/student_margin": 4.654841423034668, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 24.25, |
|
"learning_rate": 4.322136322003382e-06, |
|
"logits/chosen": 1.0548720359802246, |
|
"logits/rejected": 0.7509857416152954, |
|
"logps/chosen": -297.46795654296875, |
|
"logps/rejected": -467.52178955078125, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6388553380966187, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.260157585144043, |
|
"rewards/student_margin": 5.899013042449951, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 16.25, |
|
"learning_rate": 4.286084523151087e-06, |
|
"logits/chosen": 0.996881365776062, |
|
"logits/rejected": 0.9198592305183411, |
|
"logps/chosen": -283.44305419921875, |
|
"logps/rejected": -486.172607421875, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.3912031650543213, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.119289398193359, |
|
"rewards/student_margin": 7.510491847991943, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 4.249257578563019e-06, |
|
"logits/chosen": 0.8506627082824707, |
|
"logits/rejected": 0.977446436882019, |
|
"logps/chosen": -247.42568969726562, |
|
"logps/rejected": -471.06378173828125, |
|
"loss": 0.1767, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7721201181411743, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.995635509490967, |
|
"rewards/student_margin": 6.76775598526001, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.211671470824933e-06, |
|
"logits/chosen": 0.4623135030269623, |
|
"logits/rejected": 0.9466916918754578, |
|
"logps/chosen": -205.3867645263672, |
|
"logps/rejected": -444.2435607910156, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.03339421749115, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.132498264312744, |
|
"rewards/student_margin": 6.165892601013184, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 17.875, |
|
"learning_rate": 4.17334251199304e-06, |
|
"logits/chosen": 0.6980913877487183, |
|
"logits/rejected": 0.7692145109176636, |
|
"logps/chosen": -217.8673553466797, |
|
"logps/rejected": -433.05523681640625, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 1.1552258729934692, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.5133209228515625, |
|
"rewards/student_margin": 7.668546199798584, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 15.125, |
|
"learning_rate": 4.134287336514707e-06, |
|
"logits/chosen": 0.8068975210189819, |
|
"logits/rejected": 0.7799355387687683, |
|
"logps/chosen": -252.831787109375, |
|
"logps/rejected": -452.97235107421875, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.666953682899475, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.828895092010498, |
|
"rewards/student_margin": 6.495848178863525, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.094522894009251e-06, |
|
"logits/chosen": 0.5468628406524658, |
|
"logits/rejected": 0.8274737596511841, |
|
"logps/chosen": -225.50283813476562, |
|
"logps/rejected": -464.0188903808594, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2933818101882935, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.901278495788574, |
|
"rewards/student_margin": 7.1946611404418945, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 12.625, |
|
"learning_rate": 4.054066441911939e-06, |
|
"logits/chosen": 0.9180652499198914, |
|
"logits/rejected": 0.9175385236740112, |
|
"logps/chosen": -265.2586975097656, |
|
"logps/rejected": -485.8116149902344, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.1107780933380127, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.457947731018066, |
|
"rewards/student_margin": 8.568726539611816, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 37.0, |
|
"learning_rate": 4.012935537984414e-06, |
|
"logits/chosen": 0.5942314863204956, |
|
"logits/rejected": 0.9308391809463501, |
|
"logps/chosen": -275.4700622558594, |
|
"logps/rejected": -422.10968017578125, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1436917781829834, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.684277534484863, |
|
"rewards/student_margin": 6.827969551086426, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.25, |
|
"learning_rate": 3.971148032694764e-06, |
|
"logits/chosen": 0.9563238024711609, |
|
"logits/rejected": 0.7059202790260315, |
|
"logps/chosen": -320.32305908203125, |
|
"logps/rejected": -390.46435546875, |
|
"loss": 0.2072, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.0577527284622192, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.869620323181152, |
|
"rewards/student_margin": 6.92737340927124, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 3.928722061470577e-06, |
|
"logits/chosen": 0.8672806024551392, |
|
"logits/rejected": 0.904620349407196, |
|
"logps/chosen": -272.1111755371094, |
|
"logps/rejected": -498.47064208984375, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.163116455078125, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.814660549163818, |
|
"rewards/student_margin": 7.977777004241943, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 3.885676036828314e-06, |
|
"logits/chosen": 0.6644450426101685, |
|
"logits/rejected": 0.8107024431228638, |
|
"logps/chosen": -261.23565673828125, |
|
"logps/rejected": -452.854736328125, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.9099276661872864, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.2165422439575195, |
|
"rewards/student_margin": 7.126469612121582, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 3.842028640382427e-06, |
|
"logits/chosen": 0.7108433246612549, |
|
"logits/rejected": 0.8594176173210144, |
|
"logps/chosen": -257.36309814453125, |
|
"logps/rejected": -453.609375, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.9560137987136841, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.767098426818848, |
|
"rewards/student_margin": 6.723111629486084, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 43.5, |
|
"learning_rate": 3.797798814737701e-06, |
|
"logits/chosen": 0.6871606111526489, |
|
"logits/rejected": 0.7347510457038879, |
|
"logps/chosen": -256.4419250488281, |
|
"logps/rejected": -454.43505859375, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.0755133628845215, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.72244119644165, |
|
"rewards/student_margin": 7.797955513000488, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 11.75, |
|
"learning_rate": 3.7530057552683173e-06, |
|
"logits/chosen": 0.8444603085517883, |
|
"logits/rejected": 0.782776951789856, |
|
"logps/chosen": -257.0390319824219, |
|
"logps/rejected": -410.9728088378906, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 2.4874026775360107, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.097342491149902, |
|
"rewards/student_margin": 8.584746360778809, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 15.75, |
|
"learning_rate": 3.7076689017872246e-06, |
|
"logits/chosen": 0.7363184690475464, |
|
"logits/rejected": 0.7506911158561707, |
|
"logps/chosen": -304.94952392578125, |
|
"logps/rejected": -459.2926330566406, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.8456310033798218, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.267752170562744, |
|
"rewards/student_margin": 8.113383293151855, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 17.375, |
|
"learning_rate": 3.661807930109422e-06, |
|
"logits/chosen": 0.9374760389328003, |
|
"logits/rejected": 0.9488743543624878, |
|
"logps/chosen": -244.1278839111328, |
|
"logps/rejected": -425.54486083984375, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.9404596090316772, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.88118314743042, |
|
"rewards/student_margin": 7.821642875671387, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 3.6154427435128173e-06, |
|
"logits/chosen": 0.9854844212532043, |
|
"logits/rejected": 0.9534761309623718, |
|
"logps/chosen": -283.98541259765625, |
|
"logps/rejected": -478.35101318359375, |
|
"loss": 0.1448, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.8144950866699219, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.873044013977051, |
|
"rewards/student_margin": 7.687539577484131, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 21.5, |
|
"learning_rate": 3.5685934641003772e-06, |
|
"logits/chosen": 0.7693665623664856, |
|
"logits/rejected": 0.8945738673210144, |
|
"logps/chosen": -282.7848205566406, |
|
"logps/rejected": -486.8995056152344, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.3459951877593994, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.5364813804626465, |
|
"rewards/student_margin": 8.882476806640625, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 3.521280424067296e-06, |
|
"logits/chosen": 0.965567946434021, |
|
"logits/rejected": 0.9754508137702942, |
|
"logps/chosen": -291.39251708984375, |
|
"logps/rejected": -505.8446350097656, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2771803140640259, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.258183479309082, |
|
"rewards/student_margin": 8.53536319732666, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.473524156876999e-06, |
|
"logits/chosen": 0.831188976764679, |
|
"logits/rejected": 0.8925005197525024, |
|
"logps/chosen": -298.30377197265625, |
|
"logps/rejected": -461.78680419921875, |
|
"loss": 0.2204, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 1.6619608402252197, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.813011169433594, |
|
"rewards/student_margin": 7.474971771240234, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 38.5, |
|
"learning_rate": 3.425345388349787e-06, |
|
"logits/chosen": 0.7174257040023804, |
|
"logits/rejected": 0.8275891542434692, |
|
"logps/chosen": -240.74246215820312, |
|
"logps/rejected": -411.5419921875, |
|
"loss": 0.1942, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.9081161618232727, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.642927646636963, |
|
"rewards/student_margin": 6.551044464111328, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 15.9375, |
|
"learning_rate": 3.376765027668003e-06, |
|
"logits/chosen": 0.8432585000991821, |
|
"logits/rejected": 0.8388878107070923, |
|
"logps/chosen": -280.2584533691406, |
|
"logps/rejected": -491.39178466796875, |
|
"loss": 0.2408, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.4920928478240967, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.902321815490723, |
|
"rewards/student_margin": 8.394414901733398, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 3.3278041583016286e-06, |
|
"logits/chosen": 0.6360442042350769, |
|
"logits/rejected": 0.8018967509269714, |
|
"logps/chosen": -252.11990356445312, |
|
"logps/rejected": -425.5966796875, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.4703423976898193, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.4766926765441895, |
|
"rewards/student_margin": 6.947035312652588, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 3.2784840288582283e-06, |
|
"logits/chosen": 0.6672400236129761, |
|
"logits/rejected": 0.6059892773628235, |
|
"logps/chosen": -277.3988342285156, |
|
"logps/rejected": -451.8529357910156, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.5886940956115723, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.682928562164307, |
|
"rewards/student_margin": 7.271622657775879, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.375, |
|
"learning_rate": 3.2288260438612322e-06, |
|
"logits/chosen": 0.9587205648422241, |
|
"logits/rejected": 1.0139048099517822, |
|
"logps/chosen": -284.29925537109375, |
|
"logps/rejected": -498.6830139160156, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 2.2029919624328613, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.883734703063965, |
|
"rewards/student_margin": 9.086727142333984, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.1788517544605545e-06, |
|
"logits/chosen": 1.1525061130523682, |
|
"logits/rejected": 0.7991655468940735, |
|
"logps/chosen": -297.87835693359375, |
|
"logps/rejected": -439.14862060546875, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.4161432981491089, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.34813928604126, |
|
"rewards/student_margin": 7.764281272888184, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 54.0, |
|
"learning_rate": 3.1285828490795744e-06, |
|
"logits/chosen": 0.8127134442329407, |
|
"logits/rejected": 0.8160552978515625, |
|
"logps/chosen": -264.32086181640625, |
|
"logps/rejected": -431.55584716796875, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 2.1443748474121094, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.04656982421875, |
|
"rewards/student_margin": 7.190944671630859, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 18.875, |
|
"learning_rate": 3.078041144002539e-06, |
|
"logits/chosen": 0.9984350204467773, |
|
"logits/rejected": 0.8483269810676575, |
|
"logps/chosen": -263.3470153808594, |
|
"logps/rejected": -488.2957458496094, |
|
"loss": 0.1416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.730027675628662, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.890661716461182, |
|
"rewards/student_margin": 9.620689392089844, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 24.5, |
|
"learning_rate": 3.027248573906481e-06, |
|
"logits/chosen": 0.797519326210022, |
|
"logits/rejected": 0.7170546650886536, |
|
"logps/chosen": -275.5174865722656, |
|
"logps/rejected": -417.2601013183594, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.5745337009429932, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.585874080657959, |
|
"rewards/student_margin": 8.160408020019531, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 45.25, |
|
"learning_rate": 2.9762271823417533e-06, |
|
"logits/chosen": 0.9001606106758118, |
|
"logits/rejected": 0.6589866280555725, |
|
"logps/chosen": -292.3993835449219, |
|
"logps/rejected": -382.26116943359375, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.43106094002723694, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.44753360748291, |
|
"rewards/student_margin": 6.878593444824219, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 2.9249991121653105e-06, |
|
"logits/chosen": 0.769407331943512, |
|
"logits/rejected": 0.8254309892654419, |
|
"logps/chosen": -237.5071563720703, |
|
"logps/rejected": -430.1615295410156, |
|
"loss": 0.1906, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.6723854541778564, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.464064598083496, |
|
"rewards/student_margin": 7.136450290679932, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 2.873586595930894e-06, |
|
"logits/chosen": 0.7545122504234314, |
|
"logits/rejected": 0.804780125617981, |
|
"logps/chosen": -273.86224365234375, |
|
"logps/rejected": -455.18865966796875, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.6732370853424072, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.744823932647705, |
|
"rewards/student_margin": 7.41806173324585, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 2.8220119462402916e-06, |
|
"logits/chosen": 0.7920311093330383, |
|
"logits/rejected": 0.8012738227844238, |
|
"logps/chosen": -250.64584350585938, |
|
"logps/rejected": -458.3165588378906, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4162245988845825, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -8.085262298583984, |
|
"rewards/student_margin": 9.501487731933594, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 13.3125, |
|
"learning_rate": 2.7702975460598545e-06, |
|
"logits/chosen": 0.969406008720398, |
|
"logits/rejected": 0.608393132686615, |
|
"logps/chosen": -290.60137939453125, |
|
"logps/rejected": -373.6714172363281, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.6002813577651978, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.063361644744873, |
|
"rewards/student_margin": 7.663643836975098, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 2.718465839006478e-06, |
|
"logits/chosen": 0.8396002650260925, |
|
"logits/rejected": 0.6521191596984863, |
|
"logps/chosen": -245.06759643554688, |
|
"logps/rejected": -434.6278381347656, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 2.286115884780884, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.042721748352051, |
|
"rewards/student_margin": 9.328838348388672, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 2.6665393196072555e-06, |
|
"logits/chosen": 1.1106770038604736, |
|
"logits/rejected": 1.1053907871246338, |
|
"logps/chosen": -294.12445068359375, |
|
"logps/rejected": -506.8854064941406, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.6015878915786743, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.636853218078613, |
|
"rewards/student_margin": 7.23844051361084, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 2.6145405235370463e-06, |
|
"logits/chosen": 0.8759015798568726, |
|
"logits/rejected": 0.9923240542411804, |
|
"logps/chosen": -266.8671875, |
|
"logps/rejected": -513.6619262695312, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.28810906410217285, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -8.521490097045898, |
|
"rewards/student_margin": 8.809598922729492, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 10.75, |
|
"learning_rate": 2.562492017838183e-06, |
|
"logits/chosen": 0.6548887491226196, |
|
"logits/rejected": 0.5423046946525574, |
|
"logps/chosen": -234.9774932861328, |
|
"logps/rejected": -374.95782470703125, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.704568862915039, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.6951775550842285, |
|
"rewards/student_margin": 7.399746894836426, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 42.75, |
|
"learning_rate": 2.5104163911265573e-06, |
|
"logits/chosen": 0.8199352025985718, |
|
"logits/rejected": 0.8868463635444641, |
|
"logps/chosen": -305.3675231933594, |
|
"logps/rejected": -431.2710876464844, |
|
"loss": 0.2353, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.2983571290969849, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.4470953941345215, |
|
"rewards/student_margin": 7.745451927185059, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.458336243788354e-06, |
|
"logits/chosen": 0.97747802734375, |
|
"logits/rejected": 0.907408595085144, |
|
"logps/chosen": -280.62652587890625, |
|
"logps/rejected": -470.7151794433594, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0915725231170654, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.712690830230713, |
|
"rewards/student_margin": 7.804263114929199, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 41.25, |
|
"learning_rate": 2.4062741781716735e-06, |
|
"logits/chosen": 0.9223145246505737, |
|
"logits/rejected": 1.0177326202392578, |
|
"logps/chosen": -281.8843994140625, |
|
"logps/rejected": -455.46514892578125, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.6827199459075928, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.358351230621338, |
|
"rewards/student_margin": 9.041070938110352, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 35.75, |
|
"learning_rate": 2.3542527887773004e-06, |
|
"logits/chosen": 0.848107635974884, |
|
"logits/rejected": 0.7362527847290039, |
|
"logps/chosen": -258.37823486328125, |
|
"logps/rejected": -435.1189880371094, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 2.6132445335388184, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.929879188537598, |
|
"rewards/student_margin": 9.543124198913574, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 2.3022946524528772e-06, |
|
"logits/chosen": 0.8559589385986328, |
|
"logits/rejected": 0.9640874862670898, |
|
"logps/chosen": -273.4801330566406, |
|
"logps/rejected": -515.2051391601562, |
|
"loss": 0.1064, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.710845708847046, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.155877590179443, |
|
"rewards/student_margin": 8.86672306060791, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 15.5625, |
|
"learning_rate": 2.250422318594745e-06, |
|
"logits/chosen": 0.9786388278007507, |
|
"logits/rejected": 0.9852145314216614, |
|
"logps/chosen": -283.04254150390625, |
|
"logps/rejected": -442.75250244140625, |
|
"loss": 0.1178, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.848937749862671, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.144076347351074, |
|
"rewards/student_margin": 8.993013381958008, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 14.5, |
|
"learning_rate": 2.1986582993616926e-06, |
|
"logits/chosen": 0.9063059687614441, |
|
"logits/rejected": 0.6476073861122131, |
|
"logps/chosen": -303.6152648925781, |
|
"logps/rejected": -376.8994140625, |
|
"loss": 0.1479, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3247263431549072, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.939084529876709, |
|
"rewards/student_margin": 7.263810634613037, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 36.0, |
|
"learning_rate": 2.1470250599048674e-06, |
|
"logits/chosen": 0.5596086382865906, |
|
"logits/rejected": 0.7942403554916382, |
|
"logps/chosen": -249.0633544921875, |
|
"logps/rejected": -450.846435546875, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.618423581123352, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.0066022872924805, |
|
"rewards/student_margin": 6.625025272369385, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 2.0955450086180883e-06, |
|
"logits/chosen": 0.8724521398544312, |
|
"logits/rejected": 1.031476378440857, |
|
"logps/chosen": -250.84811401367188, |
|
"logps/rejected": -475.84613037109375, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.580183982849121, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.966664791107178, |
|
"rewards/student_margin": 8.546849250793457, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 2.044240487412792e-06, |
|
"logits/chosen": 0.899154007434845, |
|
"logits/rejected": 1.014182209968567, |
|
"logps/chosen": -274.6115417480469, |
|
"logps/rejected": -471.95977783203125, |
|
"loss": 0.135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1779943704605103, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.873895168304443, |
|
"rewards/student_margin": 9.051889419555664, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.993133762021825e-06, |
|
"logits/chosen": 0.8422727584838867, |
|
"logits/rejected": 0.7054781317710876, |
|
"logps/chosen": -283.10638427734375, |
|
"logps/rejected": -371.0531311035156, |
|
"loss": 0.1059, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.5415973663330078, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.427048683166504, |
|
"rewards/student_margin": 8.968645095825195, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 6.5, |
|
"learning_rate": 1.9422470123363103e-06, |
|
"logits/chosen": 0.7101219892501831, |
|
"logits/rejected": 0.9613116383552551, |
|
"logps/chosen": -219.544677734375, |
|
"logps/rejected": -446.5857849121094, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.7443050146102905, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.597276210784912, |
|
"rewards/student_margin": 8.341582298278809, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 12.375, |
|
"learning_rate": 1.891602322779752e-06, |
|
"logits/chosen": 0.7524425387382507, |
|
"logits/rejected": 0.8981329798698425, |
|
"logps/chosen": -268.0097351074219, |
|
"logps/rejected": -413.47381591796875, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.264373540878296, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.079689025878906, |
|
"rewards/student_margin": 7.344063758850098, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 1.8412216727235877e-06, |
|
"logits/chosen": 0.7886325120925903, |
|
"logits/rejected": 0.744663417339325, |
|
"logps/chosen": -249.9864501953125, |
|
"logps/rejected": -425.23724365234375, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1795929670333862, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.4629058837890625, |
|
"rewards/student_margin": 8.642499923706055, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 9.75, |
|
"learning_rate": 1.7911269269483166e-06, |
|
"logits/chosen": 0.7772693037986755, |
|
"logits/rejected": 0.6808141469955444, |
|
"logps/chosen": -277.87237548828125, |
|
"logps/rejected": -447.9607849121094, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.6467063426971436, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.1719841957092285, |
|
"rewards/student_margin": 8.818690299987793, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 20.75, |
|
"learning_rate": 1.741339826154378e-06, |
|
"logits/chosen": 0.8876211047172546, |
|
"logits/rejected": 0.7327874898910522, |
|
"logps/chosen": -296.62548828125, |
|
"logps/rejected": -385.49908447265625, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.4299262762069702, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.448190212249756, |
|
"rewards/student_margin": 7.878116607666016, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 25.875, |
|
"learning_rate": 1.6918819775268592e-06, |
|
"logits/chosen": 0.7601544857025146, |
|
"logits/rejected": 0.8448535799980164, |
|
"logps/chosen": -244.98440551757812, |
|
"logps/rejected": -464.99859619140625, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 1.681293249130249, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.71320104598999, |
|
"rewards/student_margin": 7.394494533538818, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 1.6427748453581648e-06, |
|
"logits/chosen": 0.7851732969284058, |
|
"logits/rejected": 0.9021281003952026, |
|
"logps/chosen": -258.1846923828125, |
|
"logps/rejected": -468.34454345703125, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.4975563287734985, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.958889007568359, |
|
"rewards/student_margin": 9.456445693969727, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 31.125, |
|
"learning_rate": 1.5940397417326936e-06, |
|
"logits/chosen": 1.0174816846847534, |
|
"logits/rejected": 0.9201455116271973, |
|
"logps/chosen": -353.3731689453125, |
|
"logps/rejected": -487.0121154785156, |
|
"loss": 0.1375, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.9096149206161499, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.4507575035095215, |
|
"rewards/student_margin": 8.360371589660645, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.25, |
|
"learning_rate": 1.5456978172775643e-06, |
|
"logits/chosen": 0.8032829165458679, |
|
"logits/rejected": 0.8213585615158081, |
|
"logps/chosen": -266.06964111328125, |
|
"logps/rejected": -452.69915771484375, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.64862060546875, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -8.033635139465332, |
|
"rewards/student_margin": 8.682256698608398, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 1.4977700519834321e-06, |
|
"logits/chosen": 0.8034214973449707, |
|
"logits/rejected": 1.0235230922698975, |
|
"logps/chosen": -290.88177490234375, |
|
"logps/rejected": -500.931640625, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.317093849182129, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.850368499755859, |
|
"rewards/student_margin": 8.167463302612305, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 45.0, |
|
"learning_rate": 1.4502772460993387e-06, |
|
"logits/chosen": 1.1025218963623047, |
|
"logits/rejected": 1.010790467262268, |
|
"logps/chosen": -307.6918640136719, |
|
"logps/rejected": -470.69207763671875, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2598185539245605, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.64090633392334, |
|
"rewards/student_margin": 9.900724411010742, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.125, |
|
"learning_rate": 1.4032400111055837e-06, |
|
"logits/chosen": 0.9105297327041626, |
|
"logits/rejected": 0.8886402249336243, |
|
"logps/chosen": -319.9889831542969, |
|
"logps/rejected": -477.66619873046875, |
|
"loss": 0.0448, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7530571222305298, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.4112043380737305, |
|
"rewards/student_margin": 9.164262771606445, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.3566787607685114e-06, |
|
"logits/chosen": 0.7923764586448669, |
|
"logits/rejected": 0.9827996492385864, |
|
"logps/chosen": -266.4019470214844, |
|
"logps/rejected": -494.4241638183594, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.589381992816925, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.174019813537598, |
|
"rewards/student_margin": 7.763401985168457, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 1.3106137022811084e-06, |
|
"logits/chosen": 0.8517892956733704, |
|
"logits/rejected": 0.7801742553710938, |
|
"logps/chosen": -283.55535888671875, |
|
"logps/rejected": -404.27117919921875, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.2257543802261353, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.162593841552734, |
|
"rewards/student_margin": 7.388348579406738, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 1.2650648274932524e-06, |
|
"logits/chosen": 0.8167713284492493, |
|
"logits/rejected": 0.9700421094894409, |
|
"logps/chosen": -285.7298889160156, |
|
"logps/rejected": -491.59454345703125, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.5120792388916016, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.814170837402344, |
|
"rewards/student_margin": 8.326250076293945, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 1.2200519042354156e-06, |
|
"logits/chosen": 0.7993478775024414, |
|
"logits/rejected": 0.9609449505805969, |
|
"logps/chosen": -280.5130615234375, |
|
"logps/rejected": -520.2760009765625, |
|
"loss": 0.1403, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7520939111709595, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.6659979820251465, |
|
"rewards/student_margin": 8.418092727661133, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 21.125, |
|
"learning_rate": 1.1755944677395906e-06, |
|
"logits/chosen": 0.7904524803161621, |
|
"logits/rejected": 0.6315158605575562, |
|
"logps/chosen": -269.69537353515625, |
|
"logps/rejected": -438.4429626464844, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4417873620986938, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.934798717498779, |
|
"rewards/student_margin": 7.376585483551025, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 17.0, |
|
"learning_rate": 1.131711812161163e-06, |
|
"logits/chosen": 0.9502233266830444, |
|
"logits/rejected": 0.7244770526885986, |
|
"logps/chosen": -259.23577880859375, |
|
"logps/rejected": -458.7930603027344, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.806561827659607, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.84487247467041, |
|
"rewards/student_margin": 8.651433944702148, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 1.0884229822054112e-06, |
|
"logits/chosen": 0.8743753433227539, |
|
"logits/rejected": 0.9601569175720215, |
|
"logps/chosen": -272.127197265625, |
|
"logps/rejected": -458.1211853027344, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.8998870849609375, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.856209754943848, |
|
"rewards/student_margin": 7.756096839904785, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 1.045746764862264e-06, |
|
"logits/chosen": 0.9644481539726257, |
|
"logits/rejected": 0.7293006181716919, |
|
"logps/chosen": -280.24945068359375, |
|
"logps/rejected": -439.32110595703125, |
|
"loss": 0.097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4347035884857178, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.476078987121582, |
|
"rewards/student_margin": 8.910783767700195, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 25.625, |
|
"learning_rate": 1.003701681252901e-06, |
|
"logits/chosen": 0.90362548828125, |
|
"logits/rejected": 1.1235862970352173, |
|
"logps/chosen": -323.3556213378906, |
|
"logps/rejected": -532.652099609375, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6603376865386963, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.796300411224365, |
|
"rewards/student_margin": 9.45663833618164, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 19.625, |
|
"learning_rate": 9.623059785917452e-07, |
|
"logits/chosen": 0.7187969088554382, |
|
"logits/rejected": 0.9621503949165344, |
|
"logps/chosen": -261.4852294921875, |
|
"logps/rejected": -447.50360107421875, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.18874484300613403, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.745814800262451, |
|
"rewards/student_margin": 5.9345598220825195, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 7.375, |
|
"learning_rate": 9.215776222673217e-07, |
|
"logits/chosen": 0.3421107232570648, |
|
"logits/rejected": 0.8057771921157837, |
|
"logps/chosen": -217.62753295898438, |
|
"logps/rejected": -422.5655822753906, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.3956643342971802, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.416560173034668, |
|
"rewards/student_margin": 6.812225341796875, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 8.815342880454312e-07, |
|
"logits/chosen": 0.9692196846008301, |
|
"logits/rejected": 0.8103785514831543, |
|
"logps/chosen": -307.67828369140625, |
|
"logps/rejected": -442.64373779296875, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5794651508331299, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.477078437805176, |
|
"rewards/student_margin": 8.056543350219727, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 8.421933543980126e-07, |
|
"logits/chosen": 0.7807630896568298, |
|
"logits/rejected": 0.7269675731658936, |
|
"logps/chosen": -286.12835693359375, |
|
"logps/rejected": -414.36468505859375, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4347432851791382, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.075833797454834, |
|
"rewards/student_margin": 7.510578155517578, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 28.25, |
|
"learning_rate": 8.035718949610344e-07, |
|
"logits/chosen": 0.898847222328186, |
|
"logits/rejected": 0.9448798298835754, |
|
"logps/chosen": -254.6656494140625, |
|
"logps/rejected": -446.5108337402344, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.2065142393112183, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.854421138763428, |
|
"rewards/student_margin": 9.060935020446777, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 7.656866711246813e-07, |
|
"logits/chosen": 0.808681309223175, |
|
"logits/rejected": 0.8214341998100281, |
|
"logps/chosen": -280.2502746582031, |
|
"logps/rejected": -465.1089782714844, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8903652429580688, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.440779209136963, |
|
"rewards/student_margin": 8.331144332885742, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 18.0, |
|
"learning_rate": 7.285541247590445e-07, |
|
"logits/chosen": 0.7580270171165466, |
|
"logits/rejected": 0.7444619536399841, |
|
"logps/chosen": -226.6719207763672, |
|
"logps/rejected": -390.0633850097656, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.3921324014663696, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.383325099945068, |
|
"rewards/student_margin": 6.775457859039307, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 6.921903710784955e-07, |
|
"logits/chosen": 0.9310742616653442, |
|
"logits/rejected": 0.9668920636177063, |
|
"logps/chosen": -278.2679443359375, |
|
"logps/rejected": -485.0304260253906, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.3040916919708252, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.765616416931152, |
|
"rewards/student_margin": 8.069707870483398, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 6.566111916478133e-07, |
|
"logits/chosen": 0.7073289752006531, |
|
"logits/rejected": 0.5882384777069092, |
|
"logps/chosen": -241.0980987548828, |
|
"logps/rejected": -371.38739013671875, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.356290578842163, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.118157386779785, |
|
"rewards/student_margin": 7.474448204040527, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 20.875, |
|
"learning_rate": 6.218320275331167e-07, |
|
"logits/chosen": 0.7838321924209595, |
|
"logits/rejected": 0.8625351190567017, |
|
"logps/chosen": -275.72430419921875, |
|
"logps/rejected": -460.8087463378906, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.2428035736083984, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.993666648864746, |
|
"rewards/student_margin": 8.236470222473145, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 5.878679726005748e-07, |
|
"logits/chosen": 0.8522597551345825, |
|
"logits/rejected": 0.8629144430160522, |
|
"logps/chosen": -240.93460083007812, |
|
"logps/rejected": -462.85400390625, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.298228144645691, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.803357124328613, |
|
"rewards/student_margin": 8.101583480834961, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 5.547337669657912e-07, |
|
"logits/chosen": 0.9579585194587708, |
|
"logits/rejected": 0.6823960542678833, |
|
"logps/chosen": -303.3630065917969, |
|
"logps/rejected": -388.1356506347656, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3890726566314697, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.125588893890381, |
|
"rewards/student_margin": 8.51466178894043, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 42.0, |
|
"learning_rate": 5.224437905967208e-07, |
|
"logits/chosen": 0.8415319323539734, |
|
"logits/rejected": 0.9250988960266113, |
|
"logps/chosen": -291.22259521484375, |
|
"logps/rejected": -445.90252685546875, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1656577587127686, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.761130332946777, |
|
"rewards/student_margin": 7.926787376403809, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 4.910120570728805e-07, |
|
"logits/chosen": 0.8215805292129517, |
|
"logits/rejected": 0.6622090935707092, |
|
"logps/chosen": -267.0766906738281, |
|
"logps/rejected": -377.58685302734375, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.4053845405578613, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.223332405090332, |
|
"rewards/student_margin": 7.628718376159668, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 4.604522075035761e-07, |
|
"logits/chosen": 0.7112009525299072, |
|
"logits/rejected": 0.7711256742477417, |
|
"logps/chosen": -263.86737060546875, |
|
"logps/rejected": -462.98712158203125, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.1319215297698975, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.624153137207031, |
|
"rewards/student_margin": 7.756073951721191, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 45.75, |
|
"learning_rate": 4.307775046077739e-07, |
|
"logits/chosen": 0.7834721803665161, |
|
"logits/rejected": 0.9438959956169128, |
|
"logps/chosen": -295.6424560546875, |
|
"logps/rejected": -451.99444580078125, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7028887271881104, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.262876987457275, |
|
"rewards/student_margin": 8.965765953063965, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 17.125, |
|
"learning_rate": 4.020008269581896e-07, |
|
"logits/chosen": 0.980848491191864, |
|
"logits/rejected": 0.9754363894462585, |
|
"logps/chosen": -240.9884490966797, |
|
"logps/rejected": -413.36669921875, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8263895511627197, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.974771976470947, |
|
"rewards/student_margin": 7.8011603355407715, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 6.875, |
|
"learning_rate": 3.741346633921e-07, |
|
"logits/chosen": 0.9605533480644226, |
|
"logits/rejected": 0.9040767550468445, |
|
"logps/chosen": -298.63629150390625, |
|
"logps/rejected": -489.89361572265625, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 1.2241252660751343, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.067021369934082, |
|
"rewards/student_margin": 8.291147232055664, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 3.471911075912868e-07, |
|
"logits/chosen": 0.9408961534500122, |
|
"logits/rejected": 1.1718331575393677, |
|
"logps/chosen": -266.40240478515625, |
|
"logps/rejected": -504.09197998046875, |
|
"loss": 0.2707, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.8592709302902222, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.315155029296875, |
|
"rewards/student_margin": 7.1744256019592285, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 3.211818528334851e-07, |
|
"logits/chosen": 0.7999654412269592, |
|
"logits/rejected": 1.0235862731933594, |
|
"logps/chosen": -257.5556640625, |
|
"logps/rejected": -462.897216796875, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.9351085424423218, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.001922607421875, |
|
"rewards/student_margin": 8.937030792236328, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.961181869175944e-07, |
|
"logits/chosen": 1.0124728679656982, |
|
"logits/rejected": 0.8844314813613892, |
|
"logps/chosen": -300.16741943359375, |
|
"logps/rejected": -448.43450927734375, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.11926007270813, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.847194671630859, |
|
"rewards/student_margin": 8.966455459594727, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 12.5, |
|
"learning_rate": 2.720109872648716e-07, |
|
"logits/chosen": 0.4867635667324066, |
|
"logits/rejected": 0.8502141833305359, |
|
"logps/chosen": -209.577392578125, |
|
"logps/rejected": -455.8558044433594, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.4790070652961731, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.3167829513549805, |
|
"rewards/student_margin": 6.795790195465088, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 52.75, |
|
"learning_rate": 2.488707161982182e-07, |
|
"logits/chosen": 0.843124270439148, |
|
"logits/rejected": 1.0883945226669312, |
|
"logps/chosen": -262.5457458496094, |
|
"logps/rejected": -444.22052001953125, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.649353265762329, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.9945173263549805, |
|
"rewards/student_margin": 7.6438703536987305, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.75, |
|
"learning_rate": 2.267074164016228e-07, |
|
"logits/chosen": 0.9965966939926147, |
|
"logits/rejected": 0.8744094967842102, |
|
"logps/chosen": -283.0147399902344, |
|
"logps/rejected": -421.9447326660156, |
|
"loss": 0.1375, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2349519729614258, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.453166961669922, |
|
"rewards/student_margin": 8.688119888305664, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 23.125, |
|
"learning_rate": 2.0553070656171875e-07, |
|
"logits/chosen": 0.9256088137626648, |
|
"logits/rejected": 0.821808934211731, |
|
"logps/chosen": -297.85980224609375, |
|
"logps/rejected": -467.8524475097656, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.7768294811248779, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.595095634460449, |
|
"rewards/student_margin": 8.371925354003906, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 13.0, |
|
"learning_rate": 1.8534977719335352e-07, |
|
"logits/chosen": 0.9714568853378296, |
|
"logits/rejected": 1.072925329208374, |
|
"logps/chosen": -289.45220947265625, |
|
"logps/rejected": -546.474609375, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.8361371755599976, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.2439751625061035, |
|
"rewards/student_margin": 9.080111503601074, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 39.0, |
|
"learning_rate": 1.6617338665098433e-07, |
|
"logits/chosen": 0.7774202227592468, |
|
"logits/rejected": 0.9740447998046875, |
|
"logps/chosen": -269.41668701171875, |
|
"logps/rejected": -509.4042053222656, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5022609233856201, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.170316219329834, |
|
"rewards/student_margin": 8.672576904296875, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.480098573276245e-07, |
|
"logits/chosen": 1.056938886642456, |
|
"logits/rejected": 0.8980535268783569, |
|
"logps/chosen": -329.66973876953125, |
|
"logps/rejected": -467.0355529785156, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 1.363464593887329, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.241970062255859, |
|
"rewards/student_margin": 8.605435371398926, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 20.375, |
|
"learning_rate": 1.3086707204299415e-07, |
|
"logits/chosen": 0.7553235292434692, |
|
"logits/rejected": 0.7916673421859741, |
|
"logps/chosen": -288.36724853515625, |
|
"logps/rejected": -435.315673828125, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 1.3282121419906616, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.836938381195068, |
|
"rewards/student_margin": 7.1651506423950195, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 1.1475247062244344e-07, |
|
"logits/chosen": 0.7563384175300598, |
|
"logits/rejected": 0.7898184061050415, |
|
"logps/chosen": -237.79080200195312, |
|
"logps/rejected": -407.64886474609375, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.2482335567474365, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.782485485076904, |
|
"rewards/student_margin": 7.030718803405762, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 29.125, |
|
"learning_rate": 9.967304666813005e-08, |
|
"logits/chosen": 0.7608251571655273, |
|
"logits/rejected": 0.7628828287124634, |
|
"logps/chosen": -271.0698547363281, |
|
"logps/rejected": -438.0777282714844, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.6291452050209045, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.842491149902344, |
|
"rewards/student_margin": 7.4716362953186035, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 16.25, |
|
"learning_rate": 8.563534452385474e-08, |
|
"logits/chosen": 0.7894353270530701, |
|
"logits/rejected": 0.8196222186088562, |
|
"logps/chosen": -230.44387817382812, |
|
"logps/rejected": -472.4774475097656, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.7991675734519958, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.350424766540527, |
|
"rewards/student_margin": 8.149592399597168, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 6.75, |
|
"learning_rate": 7.264545643486997e-08, |
|
"logits/chosen": 0.7317989468574524, |
|
"logits/rejected": 1.0786840915679932, |
|
"logps/chosen": -269.79986572265625, |
|
"logps/rejected": -494.38079833984375, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.9669907689094543, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.734914302825928, |
|
"rewards/student_margin": 7.701905250549316, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 6.070901990389732e-08, |
|
"logits/chosen": 0.916016697883606, |
|
"logits/rejected": 0.8686779737472534, |
|
"logps/chosen": -303.04754638671875, |
|
"logps/rejected": -469.576416015625, |
|
"loss": 0.1578, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 2.311420202255249, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.765598297119141, |
|
"rewards/student_margin": 9.077019691467285, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 4.983121524449852e-08, |
|
"logits/chosen": 0.6329897046089172, |
|
"logits/rejected": 0.7745058536529541, |
|
"logps/chosen": -263.32879638671875, |
|
"logps/rejected": -447.2388610839844, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 1.3544021844863892, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.791384220123291, |
|
"rewards/student_margin": 8.14578628540039, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 4.875, |
|
"learning_rate": 4.001676333286214e-08, |
|
"logits/chosen": 0.9110337495803833, |
|
"logits/rejected": 0.8442234992980957, |
|
"logps/chosen": -295.57733154296875, |
|
"logps/rejected": -447.7826232910156, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 1.0487920045852661, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.916157245635986, |
|
"rewards/student_margin": 6.9649481773376465, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 19.0, |
|
"learning_rate": 3.126992355898306e-08, |
|
"logits/chosen": 0.9393006563186646, |
|
"logits/rejected": 0.6766337156295776, |
|
"logps/chosen": -309.7806091308594, |
|
"logps/rejected": -452.90771484375, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.41088810563087463, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.9278244972229, |
|
"rewards/student_margin": 7.3387131690979, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 2.3594491978123357e-08, |
|
"logits/chosen": 0.672354519367218, |
|
"logits/rejected": 0.8944166302680969, |
|
"logps/chosen": -264.7320861816406, |
|
"logps/rejected": -475.0292053222656, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.0495116710662842, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.066926956176758, |
|
"rewards/student_margin": 7.116438388824463, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 20.625, |
|
"learning_rate": 1.6993799663355403e-08, |
|
"logits/chosen": 0.8207957148551941, |
|
"logits/rejected": 0.7881325483322144, |
|
"logps/chosen": -233.29336547851562, |
|
"logps/rejected": -383.0066833496094, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8222615718841553, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.887686252593994, |
|
"rewards/student_margin": 7.709948539733887, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 1.14707112599044e-08, |
|
"logits/chosen": 0.8697876930236816, |
|
"logits/rejected": 0.8349195718765259, |
|
"logps/chosen": -279.90997314453125, |
|
"logps/rejected": -466.74261474609375, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.8987419605255127, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.841641426086426, |
|
"rewards/student_margin": 8.74038314819336, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 7.027623741916178e-09, |
|
"logits/chosen": 0.7564228177070618, |
|
"logits/rejected": 0.8881512880325317, |
|
"logps/chosen": -270.4629821777344, |
|
"logps/rejected": -475.2974548339844, |
|
"loss": 0.1823, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.9709944725036621, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -7.043332576751709, |
|
"rewards/student_margin": 8.014327049255371, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 3.666465372190453e-09, |
|
"logits/chosen": 0.9423489570617676, |
|
"logits/rejected": 0.9868823289871216, |
|
"logps/chosen": -273.9312744140625, |
|
"logps/rejected": -433.16259765625, |
|
"loss": 0.0941, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.6478030681610107, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -4.907739162445068, |
|
"rewards/student_margin": 6.5555419921875, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 1.3886948653307752e-09, |
|
"logits/chosen": 0.5223199129104614, |
|
"logits/rejected": 0.6211382150650024, |
|
"logps/chosen": -258.374267578125, |
|
"logps/rejected": -434.00909423828125, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 2.2387895584106445, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -5.835597515106201, |
|
"rewards/student_margin": 8.074387550354004, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.9530075467538712e-10, |
|
"logits/chosen": 0.7468317151069641, |
|
"logits/rejected": 0.8975644111633301, |
|
"logps/chosen": -255.3610382080078, |
|
"logps/rejected": -451.608642578125, |
|
"loss": 0.1558, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 1.5571821928024292, |
|
"rewards/diff": 0.0, |
|
"rewards/diff_abs": 0.0, |
|
"rewards/rejected": -6.666487693786621, |
|
"rewards/student_margin": 8.223670959472656, |
|
"rewards/teacher_margin": 0.0, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1676, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1673302539670951, |
|
"train_runtime": 1977.0883, |
|
"train_samples_per_second": 20.34, |
|
"train_steps_per_second": 0.848 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1676, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"total_flos": 0.0, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|