|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 16.86738074547546, |
|
"learning_rate": 1.199040767386091e-10, |
|
"logits/chosen": -1.901450514793396, |
|
"logits/rejected": -1.9076323509216309, |
|
"logps/chosen": -0.8524526953697205, |
|
"logps/rejected": -0.9626365900039673, |
|
"loss": 1.6316, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.704905390739441, |
|
"rewards/margins": 0.22036786377429962, |
|
"rewards/rejected": -1.9252731800079346, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 20.665932859043128, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -2.0206007957458496, |
|
"logits/rejected": -2.0063118934631348, |
|
"logps/chosen": -1.0047835111618042, |
|
"logps/rejected": -1.1094833612442017, |
|
"loss": 1.6543, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -2.0095670223236084, |
|
"rewards/margins": 0.20940010249614716, |
|
"rewards/rejected": -2.2189667224884033, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 26.20867949744724, |
|
"learning_rate": 2.398081534772182e-09, |
|
"logits/chosen": -2.0263831615448, |
|
"logits/rejected": -2.023040294647217, |
|
"logps/chosen": -1.052225112915039, |
|
"logps/rejected": -1.183236002922058, |
|
"loss": 1.6174, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.104450225830078, |
|
"rewards/margins": 0.262021541595459, |
|
"rewards/rejected": -2.366472005844116, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 20.4376768783133, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -1.9816261529922485, |
|
"logits/rejected": -1.9744749069213867, |
|
"logps/chosen": -1.054040789604187, |
|
"logps/rejected": -1.1520485877990723, |
|
"loss": 1.6706, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.108081579208374, |
|
"rewards/margins": 0.19601555168628693, |
|
"rewards/rejected": -2.3040971755981445, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 22.57924661663736, |
|
"learning_rate": 4.796163069544364e-09, |
|
"logits/chosen": -2.027863025665283, |
|
"logits/rejected": -2.0277962684631348, |
|
"logps/chosen": -1.0358012914657593, |
|
"logps/rejected": -1.1370604038238525, |
|
"loss": 1.6751, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0716025829315186, |
|
"rewards/margins": 0.20251810550689697, |
|
"rewards/rejected": -2.274120807647705, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 17.157686014437257, |
|
"learning_rate": 5.995203836930456e-09, |
|
"logits/chosen": -1.9645040035247803, |
|
"logits/rejected": -1.9652373790740967, |
|
"logps/chosen": -0.9416173100471497, |
|
"logps/rejected": -1.0079900026321411, |
|
"loss": 1.7028, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8832346200942993, |
|
"rewards/margins": 0.13274545967578888, |
|
"rewards/rejected": -2.0159800052642822, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 24.358115422988377, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -2.038653612136841, |
|
"logits/rejected": -2.0341880321502686, |
|
"logps/chosen": -1.0892280340194702, |
|
"logps/rejected": -1.14542555809021, |
|
"loss": 1.7183, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1784560680389404, |
|
"rewards/margins": 0.11239476501941681, |
|
"rewards/rejected": -2.29085111618042, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 23.16153472591092, |
|
"learning_rate": 8.393285371702639e-09, |
|
"logits/chosen": -2.0306906700134277, |
|
"logits/rejected": -2.018244743347168, |
|
"logps/chosen": -1.110377311706543, |
|
"logps/rejected": -1.204403281211853, |
|
"loss": 1.6679, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.220754623413086, |
|
"rewards/margins": 0.18805181980133057, |
|
"rewards/rejected": -2.408806562423706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 28.356652908785378, |
|
"learning_rate": 9.592326139088728e-09, |
|
"logits/chosen": -2.046745777130127, |
|
"logits/rejected": -2.043815851211548, |
|
"logps/chosen": -1.1663789749145508, |
|
"logps/rejected": -1.2379769086837769, |
|
"loss": 1.7002, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.3327579498291016, |
|
"rewards/margins": 0.14319580793380737, |
|
"rewards/rejected": -2.4759538173675537, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 18.121889263061643, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -2.004173994064331, |
|
"logits/rejected": -2.0055997371673584, |
|
"logps/chosen": -1.0416834354400635, |
|
"logps/rejected": -1.1493545770645142, |
|
"loss": 1.6516, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.083366870880127, |
|
"rewards/margins": 0.21534208953380585, |
|
"rewards/rejected": -2.2987091541290283, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 21.617985488027507, |
|
"learning_rate": 1.1990407673860912e-08, |
|
"logits/chosen": -2.0398590564727783, |
|
"logits/rejected": -2.033644199371338, |
|
"logps/chosen": -1.0076771974563599, |
|
"logps/rejected": -1.1141164302825928, |
|
"loss": 1.6554, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0153543949127197, |
|
"rewards/margins": 0.21287837624549866, |
|
"rewards/rejected": -2.2282328605651855, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 18.314365281520494, |
|
"learning_rate": 1.3189448441247003e-08, |
|
"logits/chosen": -1.9825471639633179, |
|
"logits/rejected": -1.9712765216827393, |
|
"logps/chosen": -1.0293405055999756, |
|
"logps/rejected": -1.1285316944122314, |
|
"loss": 1.6658, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.058681011199951, |
|
"rewards/margins": 0.19838199019432068, |
|
"rewards/rejected": -2.257063388824463, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 20.758021434418, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -1.973820447921753, |
|
"logits/rejected": -1.9720237255096436, |
|
"logps/chosen": -0.9644045829772949, |
|
"logps/rejected": -1.065753698348999, |
|
"loss": 1.6486, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9288091659545898, |
|
"rewards/margins": 0.20269834995269775, |
|
"rewards/rejected": -2.131507396697998, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 20.303280339058496, |
|
"learning_rate": 1.5587529976019183e-08, |
|
"logits/chosen": -2.065807580947876, |
|
"logits/rejected": -2.065150260925293, |
|
"logps/chosen": -1.0804673433303833, |
|
"logps/rejected": -1.152398705482483, |
|
"loss": 1.7011, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1609346866607666, |
|
"rewards/margins": 0.1438627988100052, |
|
"rewards/rejected": -2.304797410964966, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 24.00105137691047, |
|
"learning_rate": 1.6786570743405277e-08, |
|
"logits/chosen": -1.9791269302368164, |
|
"logits/rejected": -1.9726932048797607, |
|
"logps/chosen": -0.9777523279190063, |
|
"logps/rejected": -1.122584342956543, |
|
"loss": 1.5985, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9555046558380127, |
|
"rewards/margins": 0.28966379165649414, |
|
"rewards/rejected": -2.245168685913086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 22.96932769588003, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -1.9961645603179932, |
|
"logits/rejected": -1.991847038269043, |
|
"logps/chosen": -1.0192320346832275, |
|
"logps/rejected": -1.1370147466659546, |
|
"loss": 1.6399, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.038464069366455, |
|
"rewards/margins": 0.2355656921863556, |
|
"rewards/rejected": -2.274029493331909, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 20.410382239314544, |
|
"learning_rate": 1.9184652278177456e-08, |
|
"logits/chosen": -2.0057005882263184, |
|
"logits/rejected": -1.9997113943099976, |
|
"logps/chosen": -0.9478748440742493, |
|
"logps/rejected": -1.0972506999969482, |
|
"loss": 1.5784, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.8957496881484985, |
|
"rewards/margins": 0.29875144362449646, |
|
"rewards/rejected": -2.1945013999938965, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 25.132313050475904, |
|
"learning_rate": 2.038369304556355e-08, |
|
"logits/chosen": -2.0098798274993896, |
|
"logits/rejected": -2.0025277137756348, |
|
"logps/chosen": -1.036834478378296, |
|
"logps/rejected": -1.160753846168518, |
|
"loss": 1.6358, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.073668956756592, |
|
"rewards/margins": 0.24783854186534882, |
|
"rewards/rejected": -2.321507692337036, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 26.431650447324927, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -2.035036563873291, |
|
"logits/rejected": -2.0282251834869385, |
|
"logps/chosen": -1.0207078456878662, |
|
"logps/rejected": -1.1081664562225342, |
|
"loss": 1.6884, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0414156913757324, |
|
"rewards/margins": 0.17491717636585236, |
|
"rewards/rejected": -2.2163329124450684, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 25.864583918134127, |
|
"learning_rate": 2.278177458033573e-08, |
|
"logits/chosen": -2.0707240104675293, |
|
"logits/rejected": -2.06858229637146, |
|
"logps/chosen": -0.9697472453117371, |
|
"logps/rejected": -1.0662331581115723, |
|
"loss": 1.6531, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9394944906234741, |
|
"rewards/margins": 0.19297190010547638, |
|
"rewards/rejected": -2.1324663162231445, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 26.217250772833463, |
|
"learning_rate": 2.3980815347721823e-08, |
|
"logits/chosen": -2.042173385620117, |
|
"logits/rejected": -2.039220094680786, |
|
"logps/chosen": -1.0262937545776367, |
|
"logps/rejected": -1.1527516841888428, |
|
"loss": 1.62, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0525875091552734, |
|
"rewards/margins": 0.25291624665260315, |
|
"rewards/rejected": -2.3055033683776855, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 23.84528104787017, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -2.0345051288604736, |
|
"logits/rejected": -2.0316567420959473, |
|
"logps/chosen": -1.0741255283355713, |
|
"logps/rejected": -1.1507861614227295, |
|
"loss": 1.6946, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1482510566711426, |
|
"rewards/margins": 0.15332157909870148, |
|
"rewards/rejected": -2.301572322845459, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 18.025084777510834, |
|
"learning_rate": 2.6378896882494006e-08, |
|
"logits/chosen": -1.985107421875, |
|
"logits/rejected": -1.981000542640686, |
|
"logps/chosen": -1.0078728199005127, |
|
"logps/rejected": -1.1769083738327026, |
|
"loss": 1.572, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0157456398010254, |
|
"rewards/margins": 0.3380712866783142, |
|
"rewards/rejected": -2.3538167476654053, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 19.47784006510522, |
|
"learning_rate": 2.7577937649880097e-08, |
|
"logits/chosen": -2.020699977874756, |
|
"logits/rejected": -2.02105712890625, |
|
"logps/chosen": -1.0121351480484009, |
|
"logps/rejected": -1.1260082721710205, |
|
"loss": 1.6372, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0242702960968018, |
|
"rewards/margins": 0.22774633765220642, |
|
"rewards/rejected": -2.252016544342041, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 25.56764168439221, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -2.0520577430725098, |
|
"logits/rejected": -2.047010660171509, |
|
"logps/chosen": -1.0614417791366577, |
|
"logps/rejected": -1.1393840312957764, |
|
"loss": 1.7026, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1228835582733154, |
|
"rewards/margins": 0.1558847576379776, |
|
"rewards/rejected": -2.2787680625915527, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 21.872217314499572, |
|
"learning_rate": 2.997601918465228e-08, |
|
"logits/chosen": -1.9690310955047607, |
|
"logits/rejected": -1.9652650356292725, |
|
"logps/chosen": -1.0821864604949951, |
|
"logps/rejected": -1.1734070777893066, |
|
"loss": 1.6765, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1643729209899902, |
|
"rewards/margins": 0.18244096636772156, |
|
"rewards/rejected": -2.3468141555786133, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 23.863727142615843, |
|
"learning_rate": 3.1175059952038366e-08, |
|
"logits/chosen": -1.9906234741210938, |
|
"logits/rejected": -1.9988559484481812, |
|
"logps/chosen": -1.1053721904754639, |
|
"logps/rejected": -1.215968370437622, |
|
"loss": 1.6495, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2107443809509277, |
|
"rewards/margins": 0.221192866563797, |
|
"rewards/rejected": -2.431936740875244, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 23.628636477588664, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -2.0671885013580322, |
|
"logits/rejected": -2.05928635597229, |
|
"logps/chosen": -1.0713196992874146, |
|
"logps/rejected": -1.2007124423980713, |
|
"loss": 1.613, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.142639398574829, |
|
"rewards/margins": 0.25878530740737915, |
|
"rewards/rejected": -2.4014248847961426, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 28.940868349014373, |
|
"learning_rate": 3.3573141486810555e-08, |
|
"logits/chosen": -2.012786865234375, |
|
"logits/rejected": -2.0110411643981934, |
|
"logps/chosen": -0.9350563883781433, |
|
"logps/rejected": -1.0494303703308105, |
|
"loss": 1.6344, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.8701127767562866, |
|
"rewards/margins": 0.22874779999256134, |
|
"rewards/rejected": -2.098860740661621, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 24.54297495004221, |
|
"learning_rate": 3.477218225419664e-08, |
|
"logits/chosen": -2.043778657913208, |
|
"logits/rejected": -2.0457494258880615, |
|
"logps/chosen": -1.013352870941162, |
|
"logps/rejected": -1.108412504196167, |
|
"loss": 1.673, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.026705741882324, |
|
"rewards/margins": 0.19011931121349335, |
|
"rewards/rejected": -2.216825008392334, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 23.81150794802503, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -2.0253958702087402, |
|
"logits/rejected": -2.0171494483947754, |
|
"logps/chosen": -1.0900764465332031, |
|
"logps/rejected": -1.1909259557724, |
|
"loss": 1.6538, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1801528930664062, |
|
"rewards/margins": 0.20169894397258759, |
|
"rewards/rejected": -2.3818519115448, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 21.273729184669797, |
|
"learning_rate": 3.717026378896883e-08, |
|
"logits/chosen": -1.9567492008209229, |
|
"logits/rejected": -1.9567224979400635, |
|
"logps/chosen": -1.0873463153839111, |
|
"logps/rejected": -1.1727124452590942, |
|
"loss": 1.6796, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1746926307678223, |
|
"rewards/margins": 0.1707322895526886, |
|
"rewards/rejected": -2.3454248905181885, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 18.981062829330995, |
|
"learning_rate": 3.836930455635491e-08, |
|
"logits/chosen": -2.0308775901794434, |
|
"logits/rejected": -2.0224335193634033, |
|
"logps/chosen": -1.0090181827545166, |
|
"logps/rejected": -1.1410481929779053, |
|
"loss": 1.6236, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.018036365509033, |
|
"rewards/margins": 0.26405996084213257, |
|
"rewards/rejected": -2.2820963859558105, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 18.120122122389997, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -2.0095202922821045, |
|
"logits/rejected": -2.011646270751953, |
|
"logps/chosen": -1.0461695194244385, |
|
"logps/rejected": -1.0691479444503784, |
|
"loss": 1.7906, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.092339038848877, |
|
"rewards/margins": 0.045956991612911224, |
|
"rewards/rejected": -2.138295888900757, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 21.704181292006204, |
|
"learning_rate": 4.07673860911271e-08, |
|
"logits/chosen": -2.0557103157043457, |
|
"logits/rejected": -2.049830675125122, |
|
"logps/chosen": -1.0874840021133423, |
|
"logps/rejected": -1.1680512428283691, |
|
"loss": 1.6811, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1749680042266846, |
|
"rewards/margins": 0.16113446652889252, |
|
"rewards/rejected": -2.3361024856567383, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 21.71777146771956, |
|
"learning_rate": 4.1966426858513185e-08, |
|
"logits/chosen": -1.9898334741592407, |
|
"logits/rejected": -1.984086275100708, |
|
"logps/chosen": -0.9887999296188354, |
|
"logps/rejected": -1.1148191690444946, |
|
"loss": 1.6204, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.977599859237671, |
|
"rewards/margins": 0.25203877687454224, |
|
"rewards/rejected": -2.2296383380889893, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 24.253364705877487, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -1.9987952709197998, |
|
"logits/rejected": -1.994927167892456, |
|
"logps/chosen": -1.0863444805145264, |
|
"logps/rejected": -1.2027360200881958, |
|
"loss": 1.6318, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1726889610290527, |
|
"rewards/margins": 0.23278315365314484, |
|
"rewards/rejected": -2.4054720401763916, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 21.124442462099225, |
|
"learning_rate": 4.4364508393285374e-08, |
|
"logits/chosen": -2.0051770210266113, |
|
"logits/rejected": -2.0051817893981934, |
|
"logps/chosen": -1.0520938634872437, |
|
"logps/rejected": -1.1808488368988037, |
|
"loss": 1.61, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1041877269744873, |
|
"rewards/margins": 0.2575102150440216, |
|
"rewards/rejected": -2.3616976737976074, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 18.81450442447697, |
|
"learning_rate": 4.556354916067146e-08, |
|
"logits/chosen": -2.033322811126709, |
|
"logits/rejected": -2.037569522857666, |
|
"logps/chosen": -1.0127990245819092, |
|
"logps/rejected": -1.0860477685928345, |
|
"loss": 1.7123, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0255980491638184, |
|
"rewards/margins": 0.14649739861488342, |
|
"rewards/rejected": -2.172095537185669, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 18.05994016141799, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -2.030771255493164, |
|
"logits/rejected": -2.024479866027832, |
|
"logps/chosen": -1.022430419921875, |
|
"logps/rejected": -1.1492633819580078, |
|
"loss": 1.6144, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.04486083984375, |
|
"rewards/margins": 0.25366589426994324, |
|
"rewards/rejected": -2.2985267639160156, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 21.925695715915456, |
|
"learning_rate": 4.796163069544365e-08, |
|
"logits/chosen": -2.0310251712799072, |
|
"logits/rejected": -2.031461238861084, |
|
"logps/chosen": -0.9957631230354309, |
|
"logps/rejected": -1.0482515096664429, |
|
"loss": 1.7233, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9915262460708618, |
|
"rewards/margins": 0.10497663915157318, |
|
"rewards/rejected": -2.0965030193328857, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 21.245122356914116, |
|
"learning_rate": 4.916067146282973e-08, |
|
"logits/chosen": -2.03194260597229, |
|
"logits/rejected": -2.029832601547241, |
|
"logps/chosen": -1.0741499662399292, |
|
"logps/rejected": -1.1456435918807983, |
|
"loss": 1.7051, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1482999324798584, |
|
"rewards/margins": 0.14298732578754425, |
|
"rewards/rejected": -2.2912871837615967, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 19.413173771495842, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -2.0077686309814453, |
|
"logits/rejected": -2.0120043754577637, |
|
"logps/chosen": -1.0452171564102173, |
|
"logps/rejected": -1.1237493753433228, |
|
"loss": 1.687, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0904343128204346, |
|
"rewards/margins": 0.1570647954940796, |
|
"rewards/rejected": -2.2474987506866455, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 21.179286310681857, |
|
"learning_rate": 4.999851500573209e-08, |
|
"logits/chosen": -1.9912792444229126, |
|
"logits/rejected": -1.992300271987915, |
|
"logps/chosen": -1.0587327480316162, |
|
"logps/rejected": -1.0991578102111816, |
|
"loss": 1.7543, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.1174654960632324, |
|
"rewards/margins": 0.08085022121667862, |
|
"rewards/rejected": -2.1983156204223633, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 18.636072992550215, |
|
"learning_rate": 4.999535180235972e-08, |
|
"logits/chosen": -1.9881870746612549, |
|
"logits/rejected": -1.9882609844207764, |
|
"logps/chosen": -1.0215884447097778, |
|
"logps/rejected": -1.1435730457305908, |
|
"loss": 1.6294, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0431768894195557, |
|
"rewards/margins": 0.24396944046020508, |
|
"rewards/rejected": -2.2871460914611816, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 20.3863101378829, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -2.011289119720459, |
|
"logits/rejected": -2.0077245235443115, |
|
"logps/chosen": -1.1449435949325562, |
|
"logps/rejected": -1.1851942539215088, |
|
"loss": 1.7534, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.2898871898651123, |
|
"rewards/margins": 0.08050137013196945, |
|
"rewards/rejected": -2.3703885078430176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 28.415794251101588, |
|
"learning_rate": 4.9983754531428326e-08, |
|
"logits/chosen": -2.0104928016662598, |
|
"logits/rejected": -2.004953145980835, |
|
"logps/chosen": -1.1698896884918213, |
|
"logps/rejected": -1.287090539932251, |
|
"loss": 1.6357, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3397793769836426, |
|
"rewards/margins": 0.23440217971801758, |
|
"rewards/rejected": -2.574181079864502, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 26.13115889759558, |
|
"learning_rate": 4.997532127910954e-08, |
|
"logits/chosen": -2.0472846031188965, |
|
"logits/rejected": -2.0352375507354736, |
|
"logps/chosen": -1.0996732711791992, |
|
"logps/rejected": -1.2017238140106201, |
|
"loss": 1.657, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1993465423583984, |
|
"rewards/margins": 0.20410099625587463, |
|
"rewards/rejected": -2.4034476280212402, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 24.8300011681321, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -2.0970568656921387, |
|
"logits/rejected": -2.0868287086486816, |
|
"logps/chosen": -0.9850471615791321, |
|
"logps/rejected": -1.1070995330810547, |
|
"loss": 1.6196, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9700943231582642, |
|
"rewards/margins": 0.24410471320152283, |
|
"rewards/rejected": -2.2141990661621094, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 21.36422719755768, |
|
"learning_rate": 4.9953188504838225e-08, |
|
"logits/chosen": -2.0197677612304688, |
|
"logits/rejected": -2.0189919471740723, |
|
"logps/chosen": -0.9872976541519165, |
|
"logps/rejected": -1.1019771099090576, |
|
"loss": 1.6297, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.974595308303833, |
|
"rewards/margins": 0.22935882210731506, |
|
"rewards/rejected": -2.2039542198181152, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 20.790629252352424, |
|
"learning_rate": 4.993949053872834e-08, |
|
"logits/chosen": -2.020406484603882, |
|
"logits/rejected": -2.0069644451141357, |
|
"logps/chosen": -1.0123459100723267, |
|
"logps/rejected": -1.1401115655899048, |
|
"loss": 1.6134, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0246918201446533, |
|
"rewards/margins": 0.2555309236049652, |
|
"rewards/rejected": -2.2802231311798096, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 21.81997744622936, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -2.092318058013916, |
|
"logits/rejected": -2.085855722427368, |
|
"logps/chosen": -1.0440946817398071, |
|
"logps/rejected": -1.157952070236206, |
|
"loss": 1.6424, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0881893634796143, |
|
"rewards/margins": 0.22771528363227844, |
|
"rewards/rejected": -2.315904140472412, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 18.836919296616202, |
|
"learning_rate": 4.990683626304467e-08, |
|
"logits/chosen": -2.0161261558532715, |
|
"logits/rejected": -2.0146827697753906, |
|
"logps/chosen": -1.106687068939209, |
|
"logps/rejected": -1.2028759717941284, |
|
"loss": 1.6614, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.213374137878418, |
|
"rewards/margins": 0.19237776100635529, |
|
"rewards/rejected": -2.405751943588257, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 19.949593538287314, |
|
"learning_rate": 4.9887882248931646e-08, |
|
"logits/chosen": -1.9806255102157593, |
|
"logits/rejected": -1.9708988666534424, |
|
"logps/chosen": -0.9840561151504517, |
|
"logps/rejected": -1.0613772869110107, |
|
"loss": 1.6935, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9681122303009033, |
|
"rewards/margins": 0.15464219450950623, |
|
"rewards/rejected": -2.1227545738220215, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 25.373571489691113, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -2.001861333847046, |
|
"logits/rejected": -1.9949573278427124, |
|
"logps/chosen": -1.0307199954986572, |
|
"logps/rejected": -1.1325619220733643, |
|
"loss": 1.6593, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0614399909973145, |
|
"rewards/margins": 0.20368380844593048, |
|
"rewards/rejected": -2.2651238441467285, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 19.975615437676616, |
|
"learning_rate": 4.984472713001416e-08, |
|
"logits/chosen": -1.9626245498657227, |
|
"logits/rejected": -1.9631439447402954, |
|
"logps/chosen": -0.9997963905334473, |
|
"logps/rejected": -1.0775134563446045, |
|
"loss": 1.709, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.9995927810668945, |
|
"rewards/margins": 0.15543393790721893, |
|
"rewards/rejected": -2.155026912689209, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 19.622919568600462, |
|
"learning_rate": 4.982052905883637e-08, |
|
"logits/chosen": -2.0312600135803223, |
|
"logits/rejected": -2.031934976577759, |
|
"logps/chosen": -1.080683708190918, |
|
"logps/rejected": -1.1808573007583618, |
|
"loss": 1.6634, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.161367416381836, |
|
"rewards/margins": 0.20034709572792053, |
|
"rewards/rejected": -2.3617146015167236, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 18.11727668329915, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -2.02905011177063, |
|
"logits/rejected": -2.0147969722747803, |
|
"logps/chosen": -1.094179391860962, |
|
"logps/rejected": -1.2081434726715088, |
|
"loss": 1.6429, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.188358783721924, |
|
"rewards/margins": 0.22792859375476837, |
|
"rewards/rejected": -2.4162869453430176, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 21.724257832681683, |
|
"learning_rate": 4.976690039804555e-08, |
|
"logits/chosen": -2.0302157402038574, |
|
"logits/rejected": -2.0284671783447266, |
|
"logps/chosen": -0.9871706962585449, |
|
"logps/rejected": -1.0677026510238647, |
|
"loss": 1.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9743413925170898, |
|
"rewards/margins": 0.1610640585422516, |
|
"rewards/rejected": -2.1354053020477295, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 24.00283287273116, |
|
"learning_rate": 4.973747357830592e-08, |
|
"logits/chosen": -2.019152879714966, |
|
"logits/rejected": -2.0193400382995605, |
|
"logps/chosen": -1.0266475677490234, |
|
"logps/rejected": -1.1648699045181274, |
|
"loss": 1.5973, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.053295135498047, |
|
"rewards/margins": 0.27644452452659607, |
|
"rewards/rejected": -2.329739809036255, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 22.405917861477768, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -2.075407028198242, |
|
"logits/rejected": -2.0713820457458496, |
|
"logps/chosen": -1.032801628112793, |
|
"logps/rejected": -1.174425721168518, |
|
"loss": 1.5984, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.065603256225586, |
|
"rewards/margins": 0.2832481861114502, |
|
"rewards/rejected": -2.348851442337036, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 24.34047631979251, |
|
"learning_rate": 4.967340530291242e-08, |
|
"logits/chosen": -2.0268642902374268, |
|
"logits/rejected": -2.016897678375244, |
|
"logps/chosen": -1.0920665264129639, |
|
"logps/rejected": -1.1507337093353271, |
|
"loss": 1.7166, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1841330528259277, |
|
"rewards/margins": 0.11733441054821014, |
|
"rewards/rejected": -2.3014674186706543, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 28.014312030744783, |
|
"learning_rate": 4.9638768350993755e-08, |
|
"logits/chosen": -2.026273250579834, |
|
"logits/rejected": -2.019042491912842, |
|
"logps/chosen": -0.9955148696899414, |
|
"logps/rejected": -1.082495927810669, |
|
"loss": 1.6773, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.9910297393798828, |
|
"rewards/margins": 0.17396244406700134, |
|
"rewards/rejected": -2.164991855621338, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 22.866148444031516, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -2.0437943935394287, |
|
"logits/rejected": -2.0437843799591064, |
|
"logps/chosen": -1.0265557765960693, |
|
"logps/rejected": -1.1544904708862305, |
|
"loss": 1.6162, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0531115531921387, |
|
"rewards/margins": 0.25586965680122375, |
|
"rewards/rejected": -2.308980941772461, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 18.890515150130014, |
|
"learning_rate": 4.9564300992714914e-08, |
|
"logits/chosen": -1.957597017288208, |
|
"logits/rejected": -1.9585049152374268, |
|
"logps/chosen": -1.0095851421356201, |
|
"logps/rejected": -1.11627995967865, |
|
"loss": 1.6467, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0191702842712402, |
|
"rewards/margins": 0.21338967978954315, |
|
"rewards/rejected": -2.2325599193573, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 24.37442657575504, |
|
"learning_rate": 4.952447582110253e-08, |
|
"logits/chosen": -2.0568816661834717, |
|
"logits/rejected": -2.042471408843994, |
|
"logps/chosen": -1.0371555089950562, |
|
"logps/rejected": -1.1173784732818604, |
|
"loss": 1.6904, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.0743110179901123, |
|
"rewards/margins": 0.16044630110263824, |
|
"rewards/rejected": -2.2347569465637207, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 26.422335691665552, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -2.032116174697876, |
|
"logits/rejected": -2.0331506729125977, |
|
"logps/chosen": -1.0876410007476807, |
|
"logps/rejected": -1.174712896347046, |
|
"loss": 1.6846, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1752820014953613, |
|
"rewards/margins": 0.17414382100105286, |
|
"rewards/rejected": -2.349425792694092, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 24.027322225917015, |
|
"learning_rate": 4.943965649046064e-08, |
|
"logits/chosen": -2.005026340484619, |
|
"logits/rejected": -1.9956611394882202, |
|
"logps/chosen": -1.0621442794799805, |
|
"logps/rejected": -1.166245460510254, |
|
"loss": 1.6535, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.124288558959961, |
|
"rewards/margins": 0.2082025706768036, |
|
"rewards/rejected": -2.332490921020508, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 21.807782406029403, |
|
"learning_rate": 4.9394668293879835e-08, |
|
"logits/chosen": -1.960857629776001, |
|
"logits/rejected": -1.9516347646713257, |
|
"logps/chosen": -1.0361614227294922, |
|
"logps/rejected": -1.1057939529418945, |
|
"loss": 1.7073, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0723228454589844, |
|
"rewards/margins": 0.13926495611667633, |
|
"rewards/rejected": -2.211587905883789, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 29.962067466825946, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -2.0200858116149902, |
|
"logits/rejected": -2.0147900581359863, |
|
"logps/chosen": -1.1034678220748901, |
|
"logps/rejected": -1.209357500076294, |
|
"loss": 1.6608, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2069356441497803, |
|
"rewards/margins": 0.21177926659584045, |
|
"rewards/rejected": -2.418715000152588, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 23.065678595280374, |
|
"learning_rate": 4.929955065039848e-08, |
|
"logits/chosen": -2.019347667694092, |
|
"logits/rejected": -2.0137135982513428, |
|
"logps/chosen": -1.0183324813842773, |
|
"logps/rejected": -1.151181936264038, |
|
"loss": 1.6148, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0366649627685547, |
|
"rewards/margins": 0.2656988203525543, |
|
"rewards/rejected": -2.302363872528076, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 21.89139209126973, |
|
"learning_rate": 4.92494278898755e-08, |
|
"logits/chosen": -1.9896736145019531, |
|
"logits/rejected": -1.9866260290145874, |
|
"logps/chosen": -0.8968732953071594, |
|
"logps/rejected": -1.0222662687301636, |
|
"loss": 1.6288, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.7937465906143188, |
|
"rewards/margins": 0.2507862448692322, |
|
"rewards/rejected": -2.044532537460327, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 21.896749853278514, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -2.0112369060516357, |
|
"logits/rejected": -2.005025625228882, |
|
"logps/chosen": -1.0415465831756592, |
|
"logps/rejected": -1.1650935411453247, |
|
"loss": 1.6193, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0830931663513184, |
|
"rewards/margins": 0.24709336459636688, |
|
"rewards/rejected": -2.3301870822906494, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 23.234379865312263, |
|
"learning_rate": 4.9144072108132725e-08, |
|
"logits/chosen": -2.014512300491333, |
|
"logits/rejected": -2.003385305404663, |
|
"logps/chosen": -1.0214173793792725, |
|
"logps/rejected": -1.1051766872406006, |
|
"loss": 1.69, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.042834758758545, |
|
"rewards/margins": 0.16751877963542938, |
|
"rewards/rejected": -2.210353374481201, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 20.28090680157763, |
|
"learning_rate": 4.908884649298937e-08, |
|
"logits/chosen": -1.9976609945297241, |
|
"logits/rejected": -2.004554271697998, |
|
"logps/chosen": -1.0181934833526611, |
|
"logps/rejected": -1.079618215560913, |
|
"loss": 1.7294, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.0363869667053223, |
|
"rewards/margins": 0.12284936755895615, |
|
"rewards/rejected": -2.159236431121826, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 26.163151024624415, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -2.0144410133361816, |
|
"logits/rejected": -2.009019374847412, |
|
"logps/chosen": -1.1000624895095825, |
|
"logps/rejected": -1.1905133724212646, |
|
"loss": 1.6789, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.200124979019165, |
|
"rewards/margins": 0.18090197443962097, |
|
"rewards/rejected": -2.3810267448425293, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 22.741665187331794, |
|
"learning_rate": 4.897331922454931e-08, |
|
"logits/chosen": -1.9788360595703125, |
|
"logits/rejected": -1.9825944900512695, |
|
"logps/chosen": -1.003287672996521, |
|
"logps/rejected": -1.1135233640670776, |
|
"loss": 1.6517, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.006575345993042, |
|
"rewards/margins": 0.22047185897827148, |
|
"rewards/rejected": -2.2270467281341553, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 24.61264144766733, |
|
"learning_rate": 4.891302569234256e-08, |
|
"logits/chosen": -1.9734195470809937, |
|
"logits/rejected": -1.9761135578155518, |
|
"logps/chosen": -0.976268470287323, |
|
"logps/rejected": -1.1288354396820068, |
|
"loss": 1.5905, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.952536940574646, |
|
"rewards/margins": 0.30513399839401245, |
|
"rewards/rejected": -2.2576708793640137, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 25.449879441421576, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -1.992964506149292, |
|
"logits/rejected": -1.9914066791534424, |
|
"logps/chosen": -1.039645791053772, |
|
"logps/rejected": -1.1218674182891846, |
|
"loss": 1.682, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.079291582107544, |
|
"rewards/margins": 0.1644435077905655, |
|
"rewards/rejected": -2.243734836578369, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 19.97642043492772, |
|
"learning_rate": 4.87874000235894e-08, |
|
"logits/chosen": -2.013441801071167, |
|
"logits/rejected": -2.0076568126678467, |
|
"logps/chosen": -1.075208306312561, |
|
"logps/rejected": -1.23297119140625, |
|
"loss": 1.5847, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.150416612625122, |
|
"rewards/margins": 0.31552577018737793, |
|
"rewards/rejected": -2.4659423828125, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 21.789770158220232, |
|
"learning_rate": 4.872207671800876e-08, |
|
"logits/chosen": -2.0366737842559814, |
|
"logits/rejected": -2.03296160697937, |
|
"logps/chosen": -1.0434991121292114, |
|
"logps/rejected": -1.122003197669983, |
|
"loss": 1.6989, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.086998224258423, |
|
"rewards/margins": 0.15700823068618774, |
|
"rewards/rejected": -2.244006395339966, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 18.594763027326536, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -2.018526315689087, |
|
"logits/rejected": -2.0210933685302734, |
|
"logps/chosen": -1.0125329494476318, |
|
"logps/rejected": -1.1024024486541748, |
|
"loss": 1.6743, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0250658988952637, |
|
"rewards/margins": 0.17973873019218445, |
|
"rewards/rejected": -2.2048048973083496, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 24.248436921729628, |
|
"learning_rate": 4.858643212390985e-08, |
|
"logits/chosen": -2.01918363571167, |
|
"logits/rejected": -2.009265422821045, |
|
"logps/chosen": -1.0285472869873047, |
|
"logps/rejected": -1.115457534790039, |
|
"loss": 1.6899, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0570945739746094, |
|
"rewards/margins": 0.17382054030895233, |
|
"rewards/rejected": -2.230915069580078, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 21.134388800838927, |
|
"learning_rate": 4.851612037064643e-08, |
|
"logits/chosen": -2.0015549659729004, |
|
"logits/rejected": -1.9994051456451416, |
|
"logps/chosen": -0.9596086740493774, |
|
"logps/rejected": -1.0798084735870361, |
|
"loss": 1.6365, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9192173480987549, |
|
"rewards/margins": 0.2403998076915741, |
|
"rewards/rejected": -2.1596169471740723, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 18.585378419640243, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -2.0270018577575684, |
|
"logits/rejected": -2.0270333290100098, |
|
"logps/chosen": -1.0868253707885742, |
|
"logps/rejected": -1.1600069999694824, |
|
"loss": 1.705, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1736507415771484, |
|
"rewards/margins": 0.14636364579200745, |
|
"rewards/rejected": -2.320013999938965, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 18.461630690476433, |
|
"learning_rate": 4.8370542664473805e-08, |
|
"logits/chosen": -2.034163236618042, |
|
"logits/rejected": -2.028323173522949, |
|
"logps/chosen": -1.049387812614441, |
|
"logps/rejected": -1.1546533107757568, |
|
"loss": 1.6593, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.098775625228882, |
|
"rewards/margins": 0.21053071320056915, |
|
"rewards/rejected": -2.3093066215515137, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 20.644121007821905, |
|
"learning_rate": 4.829528694507624e-08, |
|
"logits/chosen": -2.0090110301971436, |
|
"logits/rejected": -2.0050089359283447, |
|
"logps/chosen": -1.161203384399414, |
|
"logps/rejected": -1.2181370258331299, |
|
"loss": 1.7269, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.322406768798828, |
|
"rewards/margins": 0.11386724561452866, |
|
"rewards/rejected": -2.4362740516662598, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 22.99115482206478, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -2.0472781658172607, |
|
"logits/rejected": -2.041145086288452, |
|
"logps/chosen": -1.0039831399917603, |
|
"logps/rejected": -1.1226513385772705, |
|
"loss": 1.6312, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0079662799835205, |
|
"rewards/margins": 0.23733630776405334, |
|
"rewards/rejected": -2.245302677154541, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 23.509214819589577, |
|
"learning_rate": 4.813986822411833e-08, |
|
"logits/chosen": -2.0386948585510254, |
|
"logits/rejected": -2.036702871322632, |
|
"logps/chosen": -1.0144814252853394, |
|
"logps/rejected": -1.0797975063323975, |
|
"loss": 1.7131, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0289628505706787, |
|
"rewards/margins": 0.13063256442546844, |
|
"rewards/rejected": -2.159595012664795, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 21.898531418971658, |
|
"learning_rate": 4.805971614785231e-08, |
|
"logits/chosen": -2.0628480911254883, |
|
"logits/rejected": -2.0612945556640625, |
|
"logps/chosen": -1.0150998830795288, |
|
"logps/rejected": -1.1112868785858154, |
|
"loss": 1.66, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0301997661590576, |
|
"rewards/margins": 0.19237405061721802, |
|
"rewards/rejected": -2.222573757171631, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 23.49291997665142, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -1.9782159328460693, |
|
"logits/rejected": -1.9796749353408813, |
|
"logps/chosen": -1.141379952430725, |
|
"logps/rejected": -1.2147481441497803, |
|
"loss": 1.7099, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.28275990486145, |
|
"rewards/margins": 0.14673666656017303, |
|
"rewards/rejected": -2.4294962882995605, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 22.767778132803752, |
|
"learning_rate": 4.7894554735150076e-08, |
|
"logits/chosen": -1.980843186378479, |
|
"logits/rejected": -1.9842725992202759, |
|
"logps/chosen": -1.0420039892196655, |
|
"logps/rejected": -1.1086304187774658, |
|
"loss": 1.7087, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.084007978439331, |
|
"rewards/margins": 0.13325299322605133, |
|
"rewards/rejected": -2.2172608375549316, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 25.17340393441993, |
|
"learning_rate": 4.7809557008879185e-08, |
|
"logits/chosen": -2.017960548400879, |
|
"logits/rejected": -2.01263165473938, |
|
"logps/chosen": -0.9720123410224915, |
|
"logps/rejected": -1.0615627765655518, |
|
"loss": 1.6769, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.944024682044983, |
|
"rewards/margins": 0.17910084128379822, |
|
"rewards/rejected": -2.1231255531311035, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 20.599563542622022, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -2.0555853843688965, |
|
"logits/rejected": -2.0527231693267822, |
|
"logps/chosen": -1.0331850051879883, |
|
"logps/rejected": -1.151185393333435, |
|
"loss": 1.6268, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0663700103759766, |
|
"rewards/margins": 0.23600046336650848, |
|
"rewards/rejected": -2.30237078666687, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 22.356981982643983, |
|
"learning_rate": 4.763475739102374e-08, |
|
"logits/chosen": -2.0080509185791016, |
|
"logits/rejected": -2.014037609100342, |
|
"logps/chosen": -1.1266523599624634, |
|
"logps/rejected": -1.1936461925506592, |
|
"loss": 1.7037, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2533047199249268, |
|
"rewards/margins": 0.13398754596710205, |
|
"rewards/rejected": -2.3872923851013184, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 18.146033211795125, |
|
"learning_rate": 4.754496778713054e-08, |
|
"logits/chosen": -1.9653065204620361, |
|
"logits/rejected": -1.9694888591766357, |
|
"logps/chosen": -1.011007308959961, |
|
"logps/rejected": -1.1335813999176025, |
|
"loss": 1.6339, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.022014617919922, |
|
"rewards/margins": 0.24514833092689514, |
|
"rewards/rejected": -2.267162799835205, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 23.944663141260857, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -2.041576862335205, |
|
"logits/rejected": -2.0405728816986084, |
|
"logps/chosen": -1.0485351085662842, |
|
"logps/rejected": -1.1592615842819214, |
|
"loss": 1.6439, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0970702171325684, |
|
"rewards/margins": 0.221452996134758, |
|
"rewards/rejected": -2.3185231685638428, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 24.526469520821017, |
|
"learning_rate": 4.736064054816145e-08, |
|
"logits/chosen": -2.042876720428467, |
|
"logits/rejected": -2.03905987739563, |
|
"logps/chosen": -0.967331051826477, |
|
"logps/rejected": -1.0941224098205566, |
|
"loss": 1.6128, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.934662103652954, |
|
"rewards/margins": 0.2535827159881592, |
|
"rewards/rejected": -2.1882448196411133, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 20.19357580628468, |
|
"learning_rate": 4.726611587052933e-08, |
|
"logits/chosen": -1.970882773399353, |
|
"logits/rejected": -1.970483422279358, |
|
"logps/chosen": -1.1071960926055908, |
|
"logps/rejected": -1.2356629371643066, |
|
"loss": 1.6116, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.2143921852111816, |
|
"rewards/margins": 0.25693362951278687, |
|
"rewards/rejected": -2.4713258743286133, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 25.984367596777485, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -2.0280659198760986, |
|
"logits/rejected": -2.0252366065979004, |
|
"logps/chosen": -1.1083245277404785, |
|
"logps/rejected": -1.20613694190979, |
|
"loss": 1.6704, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.216649055480957, |
|
"rewards/margins": 0.1956244558095932, |
|
"rewards/rejected": -2.41227388381958, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 22.322580503288012, |
|
"learning_rate": 4.707237762197549e-08, |
|
"logits/chosen": -2.0082552433013916, |
|
"logits/rejected": -2.0052361488342285, |
|
"logps/chosen": -1.0064256191253662, |
|
"logps/rejected": -1.1272127628326416, |
|
"loss": 1.6432, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0128512382507324, |
|
"rewards/margins": 0.2415740191936493, |
|
"rewards/rejected": -2.254425525665283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 26.980563216893835, |
|
"learning_rate": 4.697317767005265e-08, |
|
"logits/chosen": -2.0239129066467285, |
|
"logits/rejected": -2.0204639434814453, |
|
"logps/chosen": -1.0004609823226929, |
|
"logps/rejected": -1.0935966968536377, |
|
"loss": 1.6934, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0009219646453857, |
|
"rewards/margins": 0.1862715780735016, |
|
"rewards/rejected": -2.1871933937072754, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 20.718520630802683, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -2.0184829235076904, |
|
"logits/rejected": -2.0136635303497314, |
|
"logps/chosen": -1.0312004089355469, |
|
"logps/rejected": -1.1273009777069092, |
|
"loss": 1.6531, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0624008178710938, |
|
"rewards/margins": 0.19220107793807983, |
|
"rewards/rejected": -2.2546019554138184, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 20.062163691159576, |
|
"learning_rate": 4.677015097715994e-08, |
|
"logits/chosen": -1.9711856842041016, |
|
"logits/rejected": -1.970653772354126, |
|
"logps/chosen": -1.020525574684143, |
|
"logps/rejected": -1.1553295850753784, |
|
"loss": 1.6278, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.041051149368286, |
|
"rewards/margins": 0.2696080803871155, |
|
"rewards/rejected": -2.310659170150757, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 19.97083818571589, |
|
"learning_rate": 4.666633850812825e-08, |
|
"logits/chosen": -2.0189290046691895, |
|
"logits/rejected": -2.0129802227020264, |
|
"logps/chosen": -1.0118273496627808, |
|
"logps/rejected": -1.0940511226654053, |
|
"loss": 1.6809, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0236546993255615, |
|
"rewards/margins": 0.16444769501686096, |
|
"rewards/rejected": -2.1881022453308105, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 21.00137849840187, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -1.967718482017517, |
|
"logits/rejected": -1.9640108346939087, |
|
"logps/chosen": -0.9364064931869507, |
|
"logps/rejected": -1.068771481513977, |
|
"loss": 1.615, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8728129863739014, |
|
"rewards/margins": 0.2647300064563751, |
|
"rewards/rejected": -2.137542963027954, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 21.497115116059465, |
|
"learning_rate": 4.6454151812320715e-08, |
|
"logits/chosen": -1.9955333471298218, |
|
"logits/rejected": -1.989284873008728, |
|
"logps/chosen": -1.0373084545135498, |
|
"logps/rejected": -1.1469610929489136, |
|
"loss": 1.6514, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0746169090270996, |
|
"rewards/margins": 0.21930520236492157, |
|
"rewards/rejected": -2.293922185897827, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 23.26552290797272, |
|
"learning_rate": 4.6345792501393434e-08, |
|
"logits/chosen": -2.0028505325317383, |
|
"logits/rejected": -2.001188278198242, |
|
"logps/chosen": -1.0729515552520752, |
|
"logps/rejected": -1.2014734745025635, |
|
"loss": 1.6349, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1459031105041504, |
|
"rewards/margins": 0.2570436894893646, |
|
"rewards/rejected": -2.402946949005127, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 24.189590280718363, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -2.0241146087646484, |
|
"logits/rejected": -2.024709463119507, |
|
"logps/chosen": -1.0855356454849243, |
|
"logps/rejected": -1.1969963312149048, |
|
"loss": 1.6497, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1710712909698486, |
|
"rewards/margins": 0.22292128205299377, |
|
"rewards/rejected": -2.3939926624298096, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 27.850960995358697, |
|
"learning_rate": 4.612458003901698e-08, |
|
"logits/chosen": -2.033730983734131, |
|
"logits/rejected": -2.0262105464935303, |
|
"logps/chosen": -1.107290506362915, |
|
"logps/rejected": -1.2112653255462646, |
|
"loss": 1.6632, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.21458101272583, |
|
"rewards/margins": 0.2079494446516037, |
|
"rewards/rejected": -2.4225306510925293, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 26.689600712739118, |
|
"learning_rate": 4.6011742437890476e-08, |
|
"logits/chosen": -2.02357816696167, |
|
"logits/rejected": -2.018139123916626, |
|
"logps/chosen": -1.0435128211975098, |
|
"logps/rejected": -1.179508090019226, |
|
"loss": 1.6066, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0870256423950195, |
|
"rewards/margins": 0.2719905972480774, |
|
"rewards/rejected": -2.359016180038452, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 18.98026656983148, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -2.019516944885254, |
|
"logits/rejected": -2.012485980987549, |
|
"logps/chosen": -1.0080353021621704, |
|
"logps/rejected": -1.1305128335952759, |
|
"loss": 1.6271, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.016070604324341, |
|
"rewards/margins": 0.24495509266853333, |
|
"rewards/rejected": -2.2610256671905518, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 21.893859326791098, |
|
"learning_rate": 4.5781644155290486e-08, |
|
"logits/chosen": -1.980934739112854, |
|
"logits/rejected": -1.973128318786621, |
|
"logps/chosen": -1.0460084676742554, |
|
"logps/rejected": -1.1076754331588745, |
|
"loss": 1.7173, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0920169353485107, |
|
"rewards/margins": 0.12333414703607559, |
|
"rewards/rejected": -2.215350866317749, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 20.441423721125243, |
|
"learning_rate": 4.566439964877613e-08, |
|
"logits/chosen": -2.0089352130889893, |
|
"logits/rejected": -2.004986047744751, |
|
"logps/chosen": -0.9979375004768372, |
|
"logps/rejected": -1.084967017173767, |
|
"loss": 1.6849, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9958750009536743, |
|
"rewards/margins": 0.17405889928340912, |
|
"rewards/rejected": -2.169934034347534, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 19.078058822922983, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -2.0512731075286865, |
|
"logits/rejected": -2.0500340461730957, |
|
"logps/chosen": -1.0467660427093506, |
|
"logps/rejected": -1.1654585599899292, |
|
"loss": 1.6319, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.093532085418701, |
|
"rewards/margins": 0.2373850792646408, |
|
"rewards/rejected": -2.3309171199798584, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 21.272524194475157, |
|
"learning_rate": 4.542556111376274e-08, |
|
"logits/chosen": -2.0449581146240234, |
|
"logits/rejected": -2.0386009216308594, |
|
"logps/chosen": -1.0740997791290283, |
|
"logps/rejected": -1.1670470237731934, |
|
"loss": 1.6769, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1481995582580566, |
|
"rewards/margins": 0.18589431047439575, |
|
"rewards/rejected": -2.3340940475463867, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 26.365851681600105, |
|
"learning_rate": 4.5303983874626506e-08, |
|
"logits/chosen": -1.994737982749939, |
|
"logits/rejected": -1.9932760000228882, |
|
"logps/chosen": -1.0379770994186401, |
|
"logps/rejected": -1.1171116828918457, |
|
"loss": 1.7065, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0759541988372803, |
|
"rewards/margins": 0.15826921164989471, |
|
"rewards/rejected": -2.2342233657836914, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 24.389025234614373, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.9977524280548096, |
|
"logits/rejected": -1.989159345626831, |
|
"logps/chosen": -1.0152474641799927, |
|
"logps/rejected": -1.1263688802719116, |
|
"loss": 1.6418, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0304949283599854, |
|
"rewards/margins": 0.22224298119544983, |
|
"rewards/rejected": -2.2527377605438232, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 26.253793649660842, |
|
"learning_rate": 4.505655618438363e-08, |
|
"logits/chosen": -1.9624111652374268, |
|
"logits/rejected": -1.9582713842391968, |
|
"logps/chosen": -1.0600744485855103, |
|
"logps/rejected": -1.1660957336425781, |
|
"loss": 1.6631, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1201488971710205, |
|
"rewards/margins": 0.2120426595211029, |
|
"rewards/rejected": -2.3321914672851562, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 20.23600321587351, |
|
"learning_rate": 4.4930723126421945e-08, |
|
"logits/chosen": -2.05165433883667, |
|
"logits/rejected": -2.044738531112671, |
|
"logps/chosen": -1.0710397958755493, |
|
"logps/rejected": -1.1480246782302856, |
|
"loss": 1.6949, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1420795917510986, |
|
"rewards/margins": 0.1539701223373413, |
|
"rewards/rejected": -2.2960493564605713, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 25.63037318376481, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -1.9842115640640259, |
|
"logits/rejected": -1.9720268249511719, |
|
"logps/chosen": -1.086510419845581, |
|
"logps/rejected": -1.17435622215271, |
|
"loss": 1.6721, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.173020839691162, |
|
"rewards/margins": 0.17569169402122498, |
|
"rewards/rejected": -2.34871244430542, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 22.794093788758836, |
|
"learning_rate": 4.4674862811918155e-08, |
|
"logits/chosen": -1.9662357568740845, |
|
"logits/rejected": -1.9744333028793335, |
|
"logps/chosen": -0.9373486638069153, |
|
"logps/rejected": -1.0918363332748413, |
|
"loss": 1.5843, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8746973276138306, |
|
"rewards/margins": 0.3089754581451416, |
|
"rewards/rejected": -2.1836726665496826, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 20.323545230432092, |
|
"learning_rate": 4.454485354129966e-08, |
|
"logits/chosen": -1.9998928308486938, |
|
"logits/rejected": -1.995469331741333, |
|
"logps/chosen": -1.0085976123809814, |
|
"logps/rejected": -1.1158851385116577, |
|
"loss": 1.6557, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.017195224761963, |
|
"rewards/margins": 0.21457497775554657, |
|
"rewards/rejected": -2.2317702770233154, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 19.995091234995602, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -1.9716050624847412, |
|
"logits/rejected": -1.9596103429794312, |
|
"logps/chosen": -0.9838314056396484, |
|
"logps/rejected": -1.07748544216156, |
|
"loss": 1.6698, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9676628112792969, |
|
"rewards/margins": 0.1873079240322113, |
|
"rewards/rejected": -2.15497088432312, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 23.9057321705134, |
|
"learning_rate": 4.42807224682615e-08, |
|
"logits/chosen": -1.9828884601593018, |
|
"logits/rejected": -1.9808467626571655, |
|
"logps/chosen": -0.9355411529541016, |
|
"logps/rejected": -1.0736339092254639, |
|
"loss": 1.6071, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8710823059082031, |
|
"rewards/margins": 0.27618569135665894, |
|
"rewards/rejected": -2.1472678184509277, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 21.284396831133705, |
|
"learning_rate": 4.4146619233165604e-08, |
|
"logits/chosen": -2.023496627807617, |
|
"logits/rejected": -2.0257253646850586, |
|
"logps/chosen": -1.0640500783920288, |
|
"logps/rejected": -1.2194865942001343, |
|
"loss": 1.5927, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1281001567840576, |
|
"rewards/margins": 0.3108729124069214, |
|
"rewards/rejected": -2.4389731884002686, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 29.022562118194454, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -2.017789363861084, |
|
"logits/rejected": -2.0227370262145996, |
|
"logps/chosen": -1.1170909404754639, |
|
"logps/rejected": -1.2450226545333862, |
|
"loss": 1.623, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2341818809509277, |
|
"rewards/margins": 0.255863755941391, |
|
"rewards/rejected": -2.4900453090667725, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 21.040680635282882, |
|
"learning_rate": 4.387438449967594e-08, |
|
"logits/chosen": -1.9829397201538086, |
|
"logits/rejected": -1.9764759540557861, |
|
"logps/chosen": -0.964381992816925, |
|
"logps/rejected": -1.0864336490631104, |
|
"loss": 1.6237, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.92876398563385, |
|
"rewards/margins": 0.24410350620746613, |
|
"rewards/rejected": -2.1728672981262207, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 24.550013299727784, |
|
"learning_rate": 4.373627213825983e-08, |
|
"logits/chosen": -2.0657618045806885, |
|
"logits/rejected": -2.061527967453003, |
|
"logps/chosen": -1.0255496501922607, |
|
"logps/rejected": -1.1638200283050537, |
|
"loss": 1.6084, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0510993003845215, |
|
"rewards/margins": 0.2765410244464874, |
|
"rewards/rejected": -2.3276400566101074, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 19.732937637348012, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -2.0326967239379883, |
|
"logits/rejected": -2.0317554473876953, |
|
"logps/chosen": -0.9937663078308105, |
|
"logps/rejected": -1.099719762802124, |
|
"loss": 1.648, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.987532615661621, |
|
"rewards/margins": 0.2119067907333374, |
|
"rewards/rejected": -2.199439525604248, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 25.631669490766754, |
|
"learning_rate": 4.34561059690461e-08, |
|
"logits/chosen": -2.077247381210327, |
|
"logits/rejected": -2.079225540161133, |
|
"logps/chosen": -1.0479168891906738, |
|
"logps/rejected": -1.1121768951416016, |
|
"loss": 1.7184, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0958337783813477, |
|
"rewards/margins": 0.12852007150650024, |
|
"rewards/rejected": -2.224353790283203, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 24.61195364772657, |
|
"learning_rate": 4.3314071855773314e-08, |
|
"logits/chosen": -2.0383481979370117, |
|
"logits/rejected": -2.0388643741607666, |
|
"logps/chosen": -0.9827170372009277, |
|
"logps/rejected": -1.0801887512207031, |
|
"loss": 1.6607, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9654340744018555, |
|
"rewards/margins": 0.1949436366558075, |
|
"rewards/rejected": -2.1603775024414062, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 24.093033036544337, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -2.0104308128356934, |
|
"logits/rejected": -2.0040740966796875, |
|
"logps/chosen": -1.0747355222702026, |
|
"logps/rejected": -1.220551609992981, |
|
"loss": 1.5931, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1494710445404053, |
|
"rewards/margins": 0.2916319668292999, |
|
"rewards/rejected": -2.441103219985962, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 17.676250888332724, |
|
"learning_rate": 4.3026151493252414e-08, |
|
"logits/chosen": -2.0373542308807373, |
|
"logits/rejected": -2.032808542251587, |
|
"logps/chosen": -1.0594923496246338, |
|
"logps/rejected": -1.183484673500061, |
|
"loss": 1.6306, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1189846992492676, |
|
"rewards/margins": 0.24798473715782166, |
|
"rewards/rejected": -2.366969347000122, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 29.319097572050133, |
|
"learning_rate": 4.2880285483616895e-08, |
|
"logits/chosen": -2.004913806915283, |
|
"logits/rejected": -2.005673408508301, |
|
"logps/chosen": -1.015624761581421, |
|
"logps/rejected": -1.133320689201355, |
|
"loss": 1.641, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.031249523162842, |
|
"rewards/margins": 0.23539182543754578, |
|
"rewards/rejected": -2.26664137840271, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 18.366477723606742, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.933974027633667, |
|
"logits/rejected": -1.9319394826889038, |
|
"logps/chosen": -1.0121793746948242, |
|
"logps/rejected": -1.087815523147583, |
|
"loss": 1.705, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0243587493896484, |
|
"rewards/margins": 0.15127214789390564, |
|
"rewards/rejected": -2.175631046295166, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 19.021483566176542, |
|
"learning_rate": 4.258479307576576e-08, |
|
"logits/chosen": -1.9841632843017578, |
|
"logits/rejected": -1.9821224212646484, |
|
"logps/chosen": -0.9617762565612793, |
|
"logps/rejected": -1.0556433200836182, |
|
"loss": 1.6759, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9235525131225586, |
|
"rewards/margins": 0.18773424625396729, |
|
"rewards/rejected": -2.1112866401672363, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 25.022118857417556, |
|
"learning_rate": 4.243518744944626e-08, |
|
"logits/chosen": -2.0071628093719482, |
|
"logits/rejected": -2.002535343170166, |
|
"logps/chosen": -0.9993633031845093, |
|
"logps/rejected": -1.123016595840454, |
|
"loss": 1.6203, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9987266063690186, |
|
"rewards/margins": 0.24730615317821503, |
|
"rewards/rejected": -2.246033191680908, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 24.091606187020563, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -1.973891019821167, |
|
"logits/rejected": -1.9703378677368164, |
|
"logps/chosen": -1.0496742725372314, |
|
"logps/rejected": -1.1253955364227295, |
|
"loss": 1.6931, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.099348545074463, |
|
"rewards/margins": 0.1514427214860916, |
|
"rewards/rejected": -2.250791072845459, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 32.60072445084287, |
|
"learning_rate": 4.2132309934569e-08, |
|
"logits/chosen": -2.0483193397521973, |
|
"logits/rejected": -2.048795223236084, |
|
"logps/chosen": -1.014096975326538, |
|
"logps/rejected": -1.129988670349121, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.028193950653076, |
|
"rewards/margins": 0.23178336024284363, |
|
"rewards/rejected": -2.259977340698242, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 22.12849545877557, |
|
"learning_rate": 4.197905933704989e-08, |
|
"logits/chosen": -1.9475075006484985, |
|
"logits/rejected": -1.94496750831604, |
|
"logps/chosen": -1.0586014986038208, |
|
"logps/rejected": -1.1966216564178467, |
|
"loss": 1.6256, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1172029972076416, |
|
"rewards/margins": 0.2760399878025055, |
|
"rewards/rejected": -2.3932433128356934, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 26.196327009154857, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -1.9907910823822021, |
|
"logits/rejected": -1.9951509237289429, |
|
"logps/chosen": -1.0109912157058716, |
|
"logps/rejected": -1.139633297920227, |
|
"loss": 1.6317, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.021982431411743, |
|
"rewards/margins": 0.25728410482406616, |
|
"rewards/rejected": -2.279266595840454, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 21.70997161453033, |
|
"learning_rate": 4.1668988325514434e-08, |
|
"logits/chosen": -2.015436887741089, |
|
"logits/rejected": -2.010326385498047, |
|
"logps/chosen": -1.1151282787322998, |
|
"logps/rejected": -1.2344862222671509, |
|
"loss": 1.6553, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.2302565574645996, |
|
"rewards/margins": 0.23871548473834991, |
|
"rewards/rejected": -2.4689724445343018, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 24.489368436482035, |
|
"learning_rate": 4.1512189708209844e-08, |
|
"logits/chosen": -2.0582668781280518, |
|
"logits/rejected": -2.0570740699768066, |
|
"logps/chosen": -0.9387423396110535, |
|
"logps/rejected": -1.0281721353530884, |
|
"loss": 1.6834, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.877484679222107, |
|
"rewards/margins": 0.17885959148406982, |
|
"rewards/rejected": -2.0563442707061768, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 26.799568343362726, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -2.0068116188049316, |
|
"logits/rejected": -2.000192165374756, |
|
"logps/chosen": -1.1369554996490479, |
|
"logps/rejected": -1.2212724685668945, |
|
"loss": 1.696, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2739109992980957, |
|
"rewards/margins": 0.16863420605659485, |
|
"rewards/rejected": -2.442544937133789, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 19.49558850871138, |
|
"learning_rate": 4.119512136122882e-08, |
|
"logits/chosen": -2.069859266281128, |
|
"logits/rejected": -2.0790157318115234, |
|
"logps/chosen": -0.993932843208313, |
|
"logps/rejected": -1.1478220224380493, |
|
"loss": 1.5919, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.987865686416626, |
|
"rewards/margins": 0.30777832865715027, |
|
"rewards/rejected": -2.2956440448760986, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 19.10668753445954, |
|
"learning_rate": 4.103487392014795e-08, |
|
"logits/chosen": -1.9885203838348389, |
|
"logits/rejected": -1.976251244544983, |
|
"logps/chosen": -0.999312698841095, |
|
"logps/rejected": -1.162035584449768, |
|
"loss": 1.5665, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.99862539768219, |
|
"rewards/margins": 0.32544589042663574, |
|
"rewards/rejected": -2.324071168899536, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 19.370074530775952, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -2.025360584259033, |
|
"logits/rejected": -2.015761613845825, |
|
"logps/chosen": -0.9582245945930481, |
|
"logps/rejected": -1.0940570831298828, |
|
"loss": 1.6113, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9164491891860962, |
|
"rewards/margins": 0.2716650068759918, |
|
"rewards/rejected": -2.1881141662597656, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 20.71760664678835, |
|
"learning_rate": 4.0711008825680645e-08, |
|
"logits/chosen": -1.9764623641967773, |
|
"logits/rejected": -1.975449562072754, |
|
"logps/chosen": -1.0053189992904663, |
|
"logps/rejected": -1.125705361366272, |
|
"loss": 1.6409, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0106379985809326, |
|
"rewards/margins": 0.24077251553535461, |
|
"rewards/rejected": -2.251410722732544, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 22.88411536785193, |
|
"learning_rate": 4.054741393867306e-08, |
|
"logits/chosen": -1.9940725564956665, |
|
"logits/rejected": -1.991307020187378, |
|
"logps/chosen": -1.110062837600708, |
|
"logps/rejected": -1.1640889644622803, |
|
"loss": 1.7326, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.220125675201416, |
|
"rewards/margins": 0.10805213451385498, |
|
"rewards/rejected": -2.3281779289245605, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 21.542529455963674, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -1.995582938194275, |
|
"logits/rejected": -1.9824492931365967, |
|
"logps/chosen": -1.0086337327957153, |
|
"logps/rejected": -1.1226236820220947, |
|
"loss": 1.6355, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0172674655914307, |
|
"rewards/margins": 0.22797977924346924, |
|
"rewards/rejected": -2.2452473640441895, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 20.350925893402135, |
|
"learning_rate": 4.0216956984526784e-08, |
|
"logits/chosen": -2.0434131622314453, |
|
"logits/rejected": -2.0453896522521973, |
|
"logps/chosen": -1.0144180059432983, |
|
"logps/rejected": -1.126713514328003, |
|
"loss": 1.6479, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0288360118865967, |
|
"rewards/margins": 0.22459068894386292, |
|
"rewards/rejected": -2.253427028656006, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 18.958922435681895, |
|
"learning_rate": 4.0050118147147446e-08, |
|
"logits/chosen": -1.9865297079086304, |
|
"logits/rejected": -1.9869588613510132, |
|
"logps/chosen": -1.0981040000915527, |
|
"logps/rejected": -1.1105927228927612, |
|
"loss": 1.7979, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -2.1962080001831055, |
|
"rewards/margins": 0.024977359920740128, |
|
"rewards/rejected": -2.2211854457855225, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 20.44736409962546, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -2.0269482135772705, |
|
"logits/rejected": -2.018134117126465, |
|
"logps/chosen": -0.9491475224494934, |
|
"logps/rejected": -1.1186093091964722, |
|
"loss": 1.5605, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8982950448989868, |
|
"rewards/margins": 0.33892351388931274, |
|
"rewards/rejected": -2.2372186183929443, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 28.227918425245296, |
|
"learning_rate": 3.9713278391358724e-08, |
|
"logits/chosen": -2.0332820415496826, |
|
"logits/rejected": -2.027156352996826, |
|
"logps/chosen": -1.0234980583190918, |
|
"logps/rejected": -1.149630069732666, |
|
"loss": 1.6176, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0469961166381836, |
|
"rewards/margins": 0.2522641718387604, |
|
"rewards/rejected": -2.299260139465332, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 21.599435876957084, |
|
"learning_rate": 3.954330115139328e-08, |
|
"logits/chosen": -2.012049436569214, |
|
"logits/rejected": -2.006786823272705, |
|
"logps/chosen": -1.026759386062622, |
|
"logps/rejected": -1.135288119316101, |
|
"loss": 1.6551, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.053518772125244, |
|
"rewards/margins": 0.2170577496290207, |
|
"rewards/rejected": -2.270576238632202, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 30.010489670091495, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -2.06923246383667, |
|
"logits/rejected": -2.063246488571167, |
|
"logps/chosen": -1.0465221405029297, |
|
"logps/rejected": -1.1864023208618164, |
|
"loss": 1.6074, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0930442810058594, |
|
"rewards/margins": 0.279760479927063, |
|
"rewards/rejected": -2.372804641723633, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 19.2378025361386, |
|
"learning_rate": 3.920029168997077e-08, |
|
"logits/chosen": -2.047394037246704, |
|
"logits/rejected": -2.045605182647705, |
|
"logps/chosen": -1.0021626949310303, |
|
"logps/rejected": -1.1329083442687988, |
|
"loss": 1.6155, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0043253898620605, |
|
"rewards/margins": 0.26149123907089233, |
|
"rewards/rejected": -2.2658166885375977, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 35.52052151454362, |
|
"learning_rate": 3.9027283580662476e-08, |
|
"logits/chosen": -2.015282392501831, |
|
"logits/rejected": -2.00935697555542, |
|
"logps/chosen": -1.046675205230713, |
|
"logps/rejected": -1.1979637145996094, |
|
"loss": 1.5981, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.093350410461426, |
|
"rewards/margins": 0.3025774657726288, |
|
"rewards/rejected": -2.3959274291992188, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 19.868592765575958, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.9911415576934814, |
|
"logits/rejected": -1.9865388870239258, |
|
"logps/chosen": -0.9653146862983704, |
|
"logps/rejected": -1.1030223369598389, |
|
"loss": 1.5978, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9306293725967407, |
|
"rewards/margins": 0.27541524171829224, |
|
"rewards/rejected": -2.2060446739196777, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 24.233182948016438, |
|
"learning_rate": 3.867832141277539e-08, |
|
"logits/chosen": -2.0305354595184326, |
|
"logits/rejected": -2.0217463970184326, |
|
"logps/chosen": -1.0669244527816772, |
|
"logps/rejected": -1.1823561191558838, |
|
"loss": 1.6418, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1338489055633545, |
|
"rewards/margins": 0.23086321353912354, |
|
"rewards/rejected": -2.3647122383117676, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 24.72356012381405, |
|
"learning_rate": 3.850239188479606e-08, |
|
"logits/chosen": -1.9773706197738647, |
|
"logits/rejected": -1.9808155298233032, |
|
"logps/chosen": -1.0085049867630005, |
|
"logps/rejected": -1.1022659540176392, |
|
"loss": 1.6744, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.017009973526001, |
|
"rewards/margins": 0.18752184510231018, |
|
"rewards/rejected": -2.2045319080352783, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 24.775721760516777, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -2.0583930015563965, |
|
"logits/rejected": -2.0596446990966797, |
|
"logps/chosen": -1.0561758279800415, |
|
"logps/rejected": -1.186408281326294, |
|
"loss": 1.6182, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.112351655960083, |
|
"rewards/margins": 0.2604648470878601, |
|
"rewards/rejected": -2.372816562652588, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 20.23661779110279, |
|
"learning_rate": 3.81476977754933e-08, |
|
"logits/chosen": -1.951172113418579, |
|
"logits/rejected": -1.9476385116577148, |
|
"logps/chosen": -1.0267951488494873, |
|
"logps/rejected": -1.1004960536956787, |
|
"loss": 1.7001, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0535902976989746, |
|
"rewards/margins": 0.14740177989006042, |
|
"rewards/rejected": -2.2009921073913574, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 19.554149170089566, |
|
"learning_rate": 3.796895812770114e-08, |
|
"logits/chosen": -1.9799621105194092, |
|
"logits/rejected": -1.9810926914215088, |
|
"logps/chosen": -1.0158445835113525, |
|
"logps/rejected": -1.111926794052124, |
|
"loss": 1.6759, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.031689167022705, |
|
"rewards/margins": 0.1921643614768982, |
|
"rewards/rejected": -2.223853588104248, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 25.592129333950247, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -2.003976345062256, |
|
"logits/rejected": -2.0018844604492188, |
|
"logps/chosen": -1.0064101219177246, |
|
"logps/rejected": -1.08089280128479, |
|
"loss": 1.7033, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.012820243835449, |
|
"rewards/margins": 0.14896517992019653, |
|
"rewards/rejected": -2.16178560256958, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 21.28393320695919, |
|
"learning_rate": 3.760875646824795e-08, |
|
"logits/chosen": -1.9339759349822998, |
|
"logits/rejected": -1.937636375427246, |
|
"logps/chosen": -0.9738245010375977, |
|
"logps/rejected": -1.0825483798980713, |
|
"loss": 1.6558, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9476490020751953, |
|
"rewards/margins": 0.21744783222675323, |
|
"rewards/rejected": -2.1650967597961426, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 26.447583069644832, |
|
"learning_rate": 3.742731977727623e-08, |
|
"logits/chosen": -2.0301713943481445, |
|
"logits/rejected": -2.0270252227783203, |
|
"logps/chosen": -1.0391323566436768, |
|
"logps/rejected": -1.179983377456665, |
|
"loss": 1.6045, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0782647132873535, |
|
"rewards/margins": 0.2817017734050751, |
|
"rewards/rejected": -2.35996675491333, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 23.740367749765426, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.9730199575424194, |
|
"logits/rejected": -1.9651565551757812, |
|
"logps/chosen": -1.011291265487671, |
|
"logps/rejected": -1.1528630256652832, |
|
"loss": 1.5979, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.022582530975342, |
|
"rewards/margins": 0.28314343094825745, |
|
"rewards/rejected": -2.3057260513305664, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 21.613562706441108, |
|
"learning_rate": 3.7061838443196886e-08, |
|
"logits/chosen": -2.0166778564453125, |
|
"logits/rejected": -2.018479585647583, |
|
"logps/chosen": -1.0249133110046387, |
|
"logps/rejected": -1.1527760028839111, |
|
"loss": 1.6175, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0498266220092773, |
|
"rewards/margins": 0.25572583079338074, |
|
"rewards/rejected": -2.3055520057678223, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 26.59797525557739, |
|
"learning_rate": 3.68778194919179e-08, |
|
"logits/chosen": -1.98636794090271, |
|
"logits/rejected": -1.9875138998031616, |
|
"logps/chosen": -1.076690673828125, |
|
"logps/rejected": -1.204590916633606, |
|
"loss": 1.6207, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.15338134765625, |
|
"rewards/margins": 0.25580060482025146, |
|
"rewards/rejected": -2.409181833267212, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 23.52633825308599, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -2.0286943912506104, |
|
"logits/rejected": -2.0175669193267822, |
|
"logps/chosen": -0.9385663866996765, |
|
"logps/rejected": -1.0960757732391357, |
|
"loss": 1.5837, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.877132773399353, |
|
"rewards/margins": 0.31501883268356323, |
|
"rewards/rejected": -2.1921515464782715, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 19.78110352441659, |
|
"learning_rate": 3.6507289698834064e-08, |
|
"logits/chosen": -1.976352334022522, |
|
"logits/rejected": -1.9729232788085938, |
|
"logps/chosen": -0.984195351600647, |
|
"logps/rejected": -1.120203971862793, |
|
"loss": 1.6215, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.968390703201294, |
|
"rewards/margins": 0.27201730012893677, |
|
"rewards/rejected": -2.240407943725586, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 29.100393569917596, |
|
"learning_rate": 3.6320804903743684e-08, |
|
"logits/chosen": -2.019465923309326, |
|
"logits/rejected": -2.019134283065796, |
|
"logps/chosen": -1.035032033920288, |
|
"logps/rejected": -1.1646802425384521, |
|
"loss": 1.6245, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.070064067840576, |
|
"rewards/margins": 0.25929638743400574, |
|
"rewards/rejected": -2.3293604850769043, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 19.649258256134964, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -2.007497787475586, |
|
"logits/rejected": -2.012320041656494, |
|
"logps/chosen": -1.0912740230560303, |
|
"logps/rejected": -1.2261770963668823, |
|
"loss": 1.6174, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1825480461120605, |
|
"rewards/margins": 0.26980629563331604, |
|
"rewards/rejected": -2.4523541927337646, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 21.835797474283304, |
|
"learning_rate": 3.5945461061099736e-08, |
|
"logits/chosen": -1.9699938297271729, |
|
"logits/rejected": -1.9564844369888306, |
|
"logps/chosen": -1.0432616472244263, |
|
"logps/rejected": -1.1263704299926758, |
|
"loss": 1.7047, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0865232944488525, |
|
"rewards/margins": 0.1662176102399826, |
|
"rewards/rejected": -2.2527408599853516, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 23.15042193876464, |
|
"learning_rate": 3.5756628398668446e-08, |
|
"logits/chosen": -2.0515687465667725, |
|
"logits/rejected": -2.056640148162842, |
|
"logps/chosen": -1.1328608989715576, |
|
"logps/rejected": -1.2376948595046997, |
|
"loss": 1.672, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.2657217979431152, |
|
"rewards/margins": 0.20966771245002747, |
|
"rewards/rejected": -2.4753897190093994, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 21.73032924866429, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -2.0368218421936035, |
|
"logits/rejected": -2.0322813987731934, |
|
"logps/chosen": -1.0513852834701538, |
|
"logps/rejected": -1.188612699508667, |
|
"loss": 1.6166, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1027705669403076, |
|
"rewards/margins": 0.27445459365844727, |
|
"rewards/rejected": -2.377225399017334, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 27.991096896336643, |
|
"learning_rate": 3.5376707961436297e-08, |
|
"logits/chosen": -2.0231380462646484, |
|
"logits/rejected": -2.0172154903411865, |
|
"logps/chosen": -1.1404026746749878, |
|
"logps/rejected": -1.2050397396087646, |
|
"loss": 1.716, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2808053493499756, |
|
"rewards/margins": 0.12927410006523132, |
|
"rewards/rejected": -2.4100794792175293, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 15.526485228812097, |
|
"learning_rate": 3.51856468934734e-08, |
|
"logits/chosen": -1.9784328937530518, |
|
"logits/rejected": -1.9799473285675049, |
|
"logps/chosen": -0.9755992889404297, |
|
"logps/rejected": -1.0713945627212524, |
|
"loss": 1.6629, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9511985778808594, |
|
"rewards/margins": 0.19159065186977386, |
|
"rewards/rejected": -2.142789125442505, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 23.420786518726864, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -2.0592575073242188, |
|
"logits/rejected": -2.0537567138671875, |
|
"logps/chosen": -1.0179462432861328, |
|
"logps/rejected": -1.2126991748809814, |
|
"loss": 1.5379, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0358924865722656, |
|
"rewards/margins": 0.38950610160827637, |
|
"rewards/rejected": -2.425398349761963, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 25.260344669251943, |
|
"learning_rate": 3.480139021193057e-08, |
|
"logits/chosen": -1.9808191061019897, |
|
"logits/rejected": -1.9826618432998657, |
|
"logps/chosen": -0.9968624114990234, |
|
"logps/rejected": -1.1215671300888062, |
|
"loss": 1.6386, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9937248229980469, |
|
"rewards/margins": 0.2494095265865326, |
|
"rewards/rejected": -2.2431342601776123, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 31.402984893149874, |
|
"learning_rate": 3.4608221610008666e-08, |
|
"logits/chosen": -2.014035224914551, |
|
"logits/rejected": -2.009444236755371, |
|
"logps/chosen": -0.9725703001022339, |
|
"logps/rejected": -1.1262907981872559, |
|
"loss": 1.59, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9451406002044678, |
|
"rewards/margins": 0.30744099617004395, |
|
"rewards/rejected": -2.2525815963745117, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 18.601121296835238, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.986240029335022, |
|
"logits/rejected": -1.995163917541504, |
|
"logps/chosen": -1.018169641494751, |
|
"logps/rejected": -1.155580997467041, |
|
"loss": 1.6207, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.036339282989502, |
|
"rewards/margins": 0.27482283115386963, |
|
"rewards/rejected": -2.311161994934082, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 21.14665341971537, |
|
"learning_rate": 3.4219871777684745e-08, |
|
"logits/chosen": -1.9896999597549438, |
|
"logits/rejected": -1.9774585962295532, |
|
"logps/chosen": -0.9940080642700195, |
|
"logps/rejected": -1.1199101209640503, |
|
"loss": 1.6367, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.988016128540039, |
|
"rewards/margins": 0.2518041729927063, |
|
"rewards/rejected": -2.2398202419281006, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 21.10945671522239, |
|
"learning_rate": 3.4024717846672364e-08, |
|
"logits/chosen": -2.0322022438049316, |
|
"logits/rejected": -2.0256874561309814, |
|
"logps/chosen": -0.9947555661201477, |
|
"logps/rejected": -1.1280686855316162, |
|
"loss": 1.6224, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9895111322402954, |
|
"rewards/margins": 0.26662617921829224, |
|
"rewards/rejected": -2.2561373710632324, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 20.96016297776703, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -2.0151829719543457, |
|
"logits/rejected": -2.0123233795166016, |
|
"logps/chosen": -1.0515539646148682, |
|
"logps/rejected": -1.2059379816055298, |
|
"loss": 1.5835, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1031079292297363, |
|
"rewards/margins": 0.3087681829929352, |
|
"rewards/rejected": -2.4118759632110596, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 24.446492024491263, |
|
"learning_rate": 3.3632520546559974e-08, |
|
"logits/chosen": -1.9829241037368774, |
|
"logits/rejected": -1.9711620807647705, |
|
"logps/chosen": -0.9253702163696289, |
|
"logps/rejected": -1.1010020971298218, |
|
"loss": 1.5437, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8507404327392578, |
|
"rewards/margins": 0.35126370191574097, |
|
"rewards/rejected": -2.2020041942596436, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 22.257431028743113, |
|
"learning_rate": 3.34355047473107e-08, |
|
"logits/chosen": -1.9958999156951904, |
|
"logits/rejected": -1.991745948791504, |
|
"logps/chosen": -1.0290558338165283, |
|
"logps/rejected": -1.1228206157684326, |
|
"loss": 1.6814, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0581116676330566, |
|
"rewards/margins": 0.18752947449684143, |
|
"rewards/rejected": -2.2456412315368652, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 26.52808003756447, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -1.9655685424804688, |
|
"logits/rejected": -1.966602087020874, |
|
"logps/chosen": -1.0216814279556274, |
|
"logps/rejected": -1.1607329845428467, |
|
"loss": 1.6043, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.043362855911255, |
|
"rewards/margins": 0.27810320258140564, |
|
"rewards/rejected": -2.3214659690856934, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 18.35881071746021, |
|
"learning_rate": 3.303970809643828e-08, |
|
"logits/chosen": -1.9993665218353271, |
|
"logits/rejected": -2.0040156841278076, |
|
"logps/chosen": -1.0356684923171997, |
|
"logps/rejected": -1.169182300567627, |
|
"loss": 1.6201, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0713369846343994, |
|
"rewards/margins": 0.26702752709388733, |
|
"rewards/rejected": -2.338364601135254, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 24.62927635906183, |
|
"learning_rate": 3.2840955067685356e-08, |
|
"logits/chosen": -2.027216672897339, |
|
"logits/rejected": -2.0315163135528564, |
|
"logps/chosen": -1.0546417236328125, |
|
"logps/rejected": -1.2095133066177368, |
|
"loss": 1.5822, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.109283447265625, |
|
"rewards/margins": 0.30974280834198, |
|
"rewards/rejected": -2.4190266132354736, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 20.42510454332798, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -2.034405469894409, |
|
"logits/rejected": -2.034414768218994, |
|
"logps/chosen": -0.9350569844245911, |
|
"logps/rejected": -1.109073281288147, |
|
"loss": 1.5563, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8701139688491821, |
|
"rewards/margins": 0.34803250432014465, |
|
"rewards/rejected": -2.218146562576294, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 21.638674305028665, |
|
"learning_rate": 3.244180946015008e-08, |
|
"logits/chosen": -1.9656782150268555, |
|
"logits/rejected": -1.9663848876953125, |
|
"logps/chosen": -1.034440517425537, |
|
"logps/rejected": -1.102476716041565, |
|
"loss": 1.714, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.068881034851074, |
|
"rewards/margins": 0.1360722780227661, |
|
"rewards/rejected": -2.20495343208313, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 18.917305957392887, |
|
"learning_rate": 3.224144493965578e-08, |
|
"logits/chosen": -2.0499091148376465, |
|
"logits/rejected": -2.053539276123047, |
|
"logps/chosen": -0.9922255277633667, |
|
"logps/rejected": -1.1013872623443604, |
|
"loss": 1.6507, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9844510555267334, |
|
"rewards/margins": 0.21832342445850372, |
|
"rewards/rejected": -2.2027745246887207, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 20.55745064569853, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -2.0113415718078613, |
|
"logits/rejected": -2.005919933319092, |
|
"logps/chosen": -0.9785380363464355, |
|
"logps/rejected": -1.0900627374649048, |
|
"loss": 1.6464, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.957076072692871, |
|
"rewards/margins": 0.22304920852184296, |
|
"rewards/rejected": -2.1801254749298096, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 22.56028475204301, |
|
"learning_rate": 3.183920288821597e-08, |
|
"logits/chosen": -1.9948104619979858, |
|
"logits/rejected": -1.9915351867675781, |
|
"logps/chosen": -1.0019850730895996, |
|
"logps/rejected": -1.1697156429290771, |
|
"loss": 1.5662, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.003970146179199, |
|
"rewards/margins": 0.3354611098766327, |
|
"rewards/rejected": -2.3394312858581543, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 27.42995413253607, |
|
"learning_rate": 3.1637353633225735e-08, |
|
"logits/chosen": -2.037505626678467, |
|
"logits/rejected": -2.031297206878662, |
|
"logps/chosen": -1.0297725200653076, |
|
"logps/rejected": -1.1842756271362305, |
|
"loss": 1.5879, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0595450401306152, |
|
"rewards/margins": 0.3090066909790039, |
|
"rewards/rejected": -2.368551254272461, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 23.08165784353334, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -2.0119919776916504, |
|
"logits/rejected": -2.0123164653778076, |
|
"logps/chosen": -1.018511176109314, |
|
"logps/rejected": -1.1586663722991943, |
|
"loss": 1.623, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.037022352218628, |
|
"rewards/margins": 0.2803104817867279, |
|
"rewards/rejected": -2.3173327445983887, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 20.570737291684164, |
|
"learning_rate": 3.1232269609552875e-08, |
|
"logits/chosen": -1.9909594058990479, |
|
"logits/rejected": -1.9883991479873657, |
|
"logps/chosen": -0.9996950030326843, |
|
"logps/rejected": -1.126483678817749, |
|
"loss": 1.6305, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9993900060653687, |
|
"rewards/margins": 0.25357744097709656, |
|
"rewards/rejected": -2.252967357635498, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 18.876341063543773, |
|
"learning_rate": 3.102906331660444e-08, |
|
"logits/chosen": -2.051898956298828, |
|
"logits/rejected": -2.043545722961426, |
|
"logps/chosen": -0.9927466511726379, |
|
"logps/rejected": -1.1729360818862915, |
|
"loss": 1.5476, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9854933023452759, |
|
"rewards/margins": 0.36037883162498474, |
|
"rewards/rejected": -2.345872163772583, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 18.989404751105003, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -1.9971063137054443, |
|
"logits/rejected": -1.9903011322021484, |
|
"logps/chosen": -1.0080406665802002, |
|
"logps/rejected": -1.1570656299591064, |
|
"loss": 1.5924, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0160813331604004, |
|
"rewards/margins": 0.2980501055717468, |
|
"rewards/rejected": -2.314131259918213, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 21.76127346796002, |
|
"learning_rate": 3.062139359029599e-08, |
|
"logits/chosen": -2.0260488986968994, |
|
"logits/rejected": -2.0259416103363037, |
|
"logps/chosen": -1.0300636291503906, |
|
"logps/rejected": -1.1194459199905396, |
|
"loss": 1.6832, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.0601272583007812, |
|
"rewards/margins": 0.1787644922733307, |
|
"rewards/rejected": -2.238891839981079, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 22.315708105619215, |
|
"learning_rate": 3.041695881443437e-08, |
|
"logits/chosen": -2.0457310676574707, |
|
"logits/rejected": -2.041228771209717, |
|
"logps/chosen": -0.9756488800048828, |
|
"logps/rejected": -1.1131236553192139, |
|
"loss": 1.6062, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9512977600097656, |
|
"rewards/margins": 0.2749495506286621, |
|
"rewards/rejected": -2.2262473106384277, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 26.741622993897, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -2.0314245223999023, |
|
"logits/rejected": -2.0318028926849365, |
|
"logps/chosen": -1.000044345855713, |
|
"logps/rejected": -1.1435314416885376, |
|
"loss": 1.6005, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.000088691711426, |
|
"rewards/margins": 0.28697413206100464, |
|
"rewards/rejected": -2.287062883377075, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 23.377353479890957, |
|
"learning_rate": 3.0006961290889077e-08, |
|
"logits/chosen": -2.018728494644165, |
|
"logits/rejected": -2.0098907947540283, |
|
"logps/chosen": -1.1194074153900146, |
|
"logps/rejected": -1.2942084074020386, |
|
"loss": 1.5797, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2388148307800293, |
|
"rewards/margins": 0.3496018946170807, |
|
"rewards/rejected": -2.588416814804077, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 24.66862006119205, |
|
"learning_rate": 2.980142736433833e-08, |
|
"logits/chosen": -2.0064892768859863, |
|
"logits/rejected": -1.9998804330825806, |
|
"logps/chosen": -1.0339871644973755, |
|
"logps/rejected": -1.1035549640655518, |
|
"loss": 1.7141, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.067974328994751, |
|
"rewards/margins": 0.1391356885433197, |
|
"rewards/rejected": -2.2071099281311035, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 28.916182952269878, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -2.0349533557891846, |
|
"logits/rejected": -2.020362615585327, |
|
"logps/chosen": -1.1436104774475098, |
|
"logps/rejected": -1.247326135635376, |
|
"loss": 1.6472, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2872209548950195, |
|
"rewards/margins": 0.2074313908815384, |
|
"rewards/rejected": -2.494652271270752, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 21.48652689473404, |
|
"learning_rate": 2.9389361421596725e-08, |
|
"logits/chosen": -1.9501157999038696, |
|
"logits/rejected": -1.952577829360962, |
|
"logps/chosen": -1.0596842765808105, |
|
"logps/rejected": -1.2010711431503296, |
|
"loss": 1.604, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.119368553161621, |
|
"rewards/margins": 0.2827739119529724, |
|
"rewards/rejected": -2.402142286300659, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 23.45025608625662, |
|
"learning_rate": 2.9182858371940126e-08, |
|
"logits/chosen": -2.0330328941345215, |
|
"logits/rejected": -2.0277328491210938, |
|
"logps/chosen": -1.049903154373169, |
|
"logps/rejected": -1.1856356859207153, |
|
"loss": 1.6123, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.099806308746338, |
|
"rewards/margins": 0.2714654207229614, |
|
"rewards/rejected": -2.3712713718414307, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 21.70840796820339, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.9831184148788452, |
|
"logits/rejected": -1.9919109344482422, |
|
"logps/chosen": -0.9365741014480591, |
|
"logps/rejected": -1.0736796855926514, |
|
"loss": 1.6176, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8731482028961182, |
|
"rewards/margins": 0.27421125769615173, |
|
"rewards/rejected": -2.1473593711853027, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 25.188424132543844, |
|
"learning_rate": 2.8768984696593384e-08, |
|
"logits/chosen": -1.9801161289215088, |
|
"logits/rejected": -1.9709136486053467, |
|
"logps/chosen": -1.0179340839385986, |
|
"logps/rejected": -1.1427983045578003, |
|
"loss": 1.6414, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0358681678771973, |
|
"rewards/margins": 0.24972863495349884, |
|
"rewards/rejected": -2.2855966091156006, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 21.634699280051397, |
|
"learning_rate": 2.8561643164513637e-08, |
|
"logits/chosen": -1.901829481124878, |
|
"logits/rejected": -1.8984495401382446, |
|
"logps/chosen": -1.0501524209976196, |
|
"logps/rejected": -1.173863172531128, |
|
"loss": 1.6306, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1003048419952393, |
|
"rewards/margins": 0.24742154777050018, |
|
"rewards/rejected": -2.347726345062256, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 23.61665024363779, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -1.9798393249511719, |
|
"logits/rejected": -1.9855964183807373, |
|
"logps/chosen": -1.0607976913452148, |
|
"logps/rejected": -1.181445837020874, |
|
"loss": 1.6332, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1215953826904297, |
|
"rewards/margins": 0.2412964552640915, |
|
"rewards/rejected": -2.362891674041748, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 23.186120463820025, |
|
"learning_rate": 2.8146223586784573e-08, |
|
"logits/chosen": -1.9751768112182617, |
|
"logits/rejected": -1.9669930934906006, |
|
"logps/chosen": -1.0672011375427246, |
|
"logps/rejected": -1.2088868618011475, |
|
"loss": 1.6089, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.134402275085449, |
|
"rewards/margins": 0.2833711504936218, |
|
"rewards/rejected": -2.417773723602295, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 30.08589108385336, |
|
"learning_rate": 2.7938174743416205e-08, |
|
"logits/chosen": -1.9352929592132568, |
|
"logits/rejected": -1.93215811252594, |
|
"logps/chosen": -1.0522792339324951, |
|
"logps/rejected": -1.1689748764038086, |
|
"loss": 1.644, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1045584678649902, |
|
"rewards/margins": 0.23339109122753143, |
|
"rewards/rejected": -2.337949752807617, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 23.105545692556433, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -1.998682975769043, |
|
"logits/rejected": -1.9997676610946655, |
|
"logps/chosen": -1.1142494678497314, |
|
"logps/rejected": -1.200404167175293, |
|
"loss": 1.6925, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.228498935699463, |
|
"rewards/margins": 0.1723092645406723, |
|
"rewards/rejected": -2.400808334350586, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 22.35367179960909, |
|
"learning_rate": 2.7521472071516772e-08, |
|
"logits/chosen": -1.9958610534667969, |
|
"logits/rejected": -1.9947010278701782, |
|
"logps/chosen": -0.9450345039367676, |
|
"logps/rejected": -1.0654414892196655, |
|
"loss": 1.6368, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.8900690078735352, |
|
"rewards/margins": 0.2408140003681183, |
|
"rewards/rejected": -2.130882978439331, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 25.688467296830364, |
|
"learning_rate": 2.731284753546289e-08, |
|
"logits/chosen": -1.986534833908081, |
|
"logits/rejected": -1.9845256805419922, |
|
"logps/chosen": -1.0848838090896606, |
|
"logps/rejected": -1.2309720516204834, |
|
"loss": 1.5979, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1697676181793213, |
|
"rewards/margins": 0.29217639565467834, |
|
"rewards/rejected": -2.461944103240967, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 25.40374296681987, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -2.0504424571990967, |
|
"logits/rejected": -2.047253370285034, |
|
"logps/chosen": -1.0334125757217407, |
|
"logps/rejected": -1.1928437948226929, |
|
"loss": 1.5819, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0668251514434814, |
|
"rewards/margins": 0.31886276602745056, |
|
"rewards/rejected": -2.3856875896453857, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 21.131392600700334, |
|
"learning_rate": 2.6895125389333017e-08, |
|
"logits/chosen": -2.0092031955718994, |
|
"logits/rejected": -2.0050158500671387, |
|
"logps/chosen": -1.0256198644638062, |
|
"logps/rejected": -1.1862627267837524, |
|
"loss": 1.5762, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0512397289276123, |
|
"rewards/margins": 0.3212856650352478, |
|
"rewards/rejected": -2.372525453567505, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 20.15547075877672, |
|
"learning_rate": 2.6686057143399028e-08, |
|
"logits/chosen": -2.0078885555267334, |
|
"logits/rejected": -2.0095622539520264, |
|
"logps/chosen": -1.0626325607299805, |
|
"logps/rejected": -1.167283296585083, |
|
"loss": 1.6767, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.125265121459961, |
|
"rewards/margins": 0.20930185914039612, |
|
"rewards/rejected": -2.334566593170166, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 22.817140386866217, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -2.009531021118164, |
|
"logits/rejected": -2.009129762649536, |
|
"logps/chosen": -1.0871093273162842, |
|
"logps/rejected": -1.292332649230957, |
|
"loss": 1.5324, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1742186546325684, |
|
"rewards/margins": 0.41044631600379944, |
|
"rewards/rejected": -2.584665298461914, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 24.027811416370326, |
|
"learning_rate": 2.626757978793187e-08, |
|
"logits/chosen": -2.017061710357666, |
|
"logits/rejected": -2.010791301727295, |
|
"logps/chosen": -1.0892200469970703, |
|
"logps/rejected": -1.2187515497207642, |
|
"loss": 1.6311, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1784400939941406, |
|
"rewards/margins": 0.25906291604042053, |
|
"rewards/rejected": -2.4375030994415283, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 27.47539280675231, |
|
"learning_rate": 2.6058200095628797e-08, |
|
"logits/chosen": -1.9886070489883423, |
|
"logits/rejected": -1.9921098947525024, |
|
"logps/chosen": -0.9186259508132935, |
|
"logps/rejected": -1.098120093345642, |
|
"loss": 1.5554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.837251901626587, |
|
"rewards/margins": 0.3589881956577301, |
|
"rewards/rejected": -2.196240186691284, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 22.62589611549699, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -2.052700996398926, |
|
"logits/rejected": -2.043602228164673, |
|
"logps/chosen": -1.0874931812286377, |
|
"logps/rejected": -1.2247369289398193, |
|
"loss": 1.6309, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1749863624572754, |
|
"rewards/margins": 0.2744874358177185, |
|
"rewards/rejected": -2.4494738578796387, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 25.630077086272028, |
|
"learning_rate": 2.5639232273487993e-08, |
|
"logits/chosen": -1.9821510314941406, |
|
"logits/rejected": -1.972529649734497, |
|
"logps/chosen": -0.9799386262893677, |
|
"logps/rejected": -1.1089693307876587, |
|
"loss": 1.6268, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9598772525787354, |
|
"rewards/margins": 0.2580614686012268, |
|
"rewards/rejected": -2.2179386615753174, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 25.461200932800743, |
|
"learning_rate": 2.5429673595358142e-08, |
|
"logits/chosen": -2.0126545429229736, |
|
"logits/rejected": -2.0112295150756836, |
|
"logps/chosen": -1.0471864938735962, |
|
"logps/rejected": -1.1747767925262451, |
|
"loss": 1.626, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0943729877471924, |
|
"rewards/margins": 0.25518038868904114, |
|
"rewards/rejected": -2.3495535850524902, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 27.724134933705333, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.9738900661468506, |
|
"logits/rejected": -1.9630699157714844, |
|
"logps/chosen": -1.1224391460418701, |
|
"logps/rejected": -1.2453981637954712, |
|
"loss": 1.6223, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.2448782920837402, |
|
"rewards/margins": 0.24591811001300812, |
|
"rewards/rejected": -2.4907963275909424, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 22.449014899141954, |
|
"learning_rate": 2.5010480359492838e-08, |
|
"logits/chosen": -1.9610662460327148, |
|
"logits/rejected": -1.9581550359725952, |
|
"logps/chosen": -1.0534158945083618, |
|
"logps/rejected": -1.121906042098999, |
|
"loss": 1.7249, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1068317890167236, |
|
"rewards/margins": 0.13698022067546844, |
|
"rewards/rejected": -2.243812084197998, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 24.99779388268088, |
|
"learning_rate": 2.480087526931091e-08, |
|
"logits/chosen": -2.0083067417144775, |
|
"logits/rejected": -1.9961440563201904, |
|
"logps/chosen": -1.005535364151001, |
|
"logps/rejected": -1.1289100646972656, |
|
"loss": 1.6406, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.011070728302002, |
|
"rewards/margins": 0.24674968421459198, |
|
"rewards/rejected": -2.2578201293945312, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 21.986886108291046, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -1.967773199081421, |
|
"logits/rejected": -1.9639675617218018, |
|
"logps/chosen": -1.0770162343978882, |
|
"logps/rejected": -1.1647992134094238, |
|
"loss": 1.6916, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1540324687957764, |
|
"rewards/margins": 0.17556606233119965, |
|
"rewards/rejected": -2.3295984268188477, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 26.838033792582817, |
|
"learning_rate": 2.4381721815274443e-08, |
|
"logits/chosen": -2.0369725227355957, |
|
"logits/rejected": -2.0373129844665527, |
|
"logps/chosen": -1.0236698389053345, |
|
"logps/rejected": -1.163711667060852, |
|
"loss": 1.6162, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.047339677810669, |
|
"rewards/margins": 0.2800835967063904, |
|
"rewards/rejected": -2.327423334121704, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 23.085774593086864, |
|
"learning_rate": 2.4172202916176936e-08, |
|
"logits/chosen": -2.0389699935913086, |
|
"logits/rejected": -2.041323184967041, |
|
"logps/chosen": -0.971706211566925, |
|
"logps/rejected": -1.1475776433944702, |
|
"loss": 1.5717, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.94341242313385, |
|
"rewards/margins": 0.35174277424812317, |
|
"rewards/rejected": -2.2951552867889404, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 22.57399934210468, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -1.9822057485580444, |
|
"logits/rejected": -1.9802274703979492, |
|
"logps/chosen": -0.9580032229423523, |
|
"logps/rejected": -1.131255865097046, |
|
"loss": 1.572, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9160064458847046, |
|
"rewards/margins": 0.3465050458908081, |
|
"rewards/rejected": -2.262511730194092, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 26.337436430641183, |
|
"learning_rate": 2.3753354414355334e-08, |
|
"logits/chosen": -1.9447540044784546, |
|
"logits/rejected": -1.9341723918914795, |
|
"logps/chosen": -1.0690467357635498, |
|
"logps/rejected": -1.191696047782898, |
|
"loss": 1.6426, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1380934715270996, |
|
"rewards/margins": 0.24529826641082764, |
|
"rewards/rejected": -2.383392095565796, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 21.802361149754983, |
|
"learning_rate": 2.3544054254951408e-08, |
|
"logits/chosen": -1.9793498516082764, |
|
"logits/rejected": -1.97073233127594, |
|
"logps/chosen": -0.9383029937744141, |
|
"logps/rejected": -1.1460812091827393, |
|
"loss": 1.5183, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8766059875488281, |
|
"rewards/margins": 0.4155563414096832, |
|
"rewards/rejected": -2.2921624183654785, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 21.6845132948567, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -2.0341391563415527, |
|
"logits/rejected": -2.0266478061676025, |
|
"logps/chosen": -1.0988128185272217, |
|
"logps/rejected": -1.1753710508346558, |
|
"loss": 1.707, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1976256370544434, |
|
"rewards/margins": 0.15311647951602936, |
|
"rewards/rejected": -2.3507421016693115, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 22.160651360455354, |
|
"learning_rate": 2.3125775682807826e-08, |
|
"logits/chosen": -2.0457205772399902, |
|
"logits/rejected": -2.045448064804077, |
|
"logps/chosen": -1.1700584888458252, |
|
"logps/rejected": -1.2783877849578857, |
|
"loss": 1.6602, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.3401169776916504, |
|
"rewards/margins": 0.21665871143341064, |
|
"rewards/rejected": -2.5567755699157715, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 24.211503707909046, |
|
"learning_rate": 2.291682667332464e-08, |
|
"logits/chosen": -2.0610098838806152, |
|
"logits/rejected": -2.056089401245117, |
|
"logps/chosen": -1.0519418716430664, |
|
"logps/rejected": -1.1878581047058105, |
|
"loss": 1.6179, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.103883743286133, |
|
"rewards/margins": 0.2718324065208435, |
|
"rewards/rejected": -2.375716209411621, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 19.198310748892013, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -2.0232059955596924, |
|
"logits/rejected": -2.0173497200012207, |
|
"logps/chosen": -1.0365447998046875, |
|
"logps/rejected": -1.2240616083145142, |
|
"loss": 1.5606, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.073089599609375, |
|
"rewards/margins": 0.3750336468219757, |
|
"rewards/rejected": -2.4481232166290283, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 26.19091650904539, |
|
"learning_rate": 2.2499382647765797e-08, |
|
"logits/chosen": -2.0218329429626465, |
|
"logits/rejected": -2.01828932762146, |
|
"logps/chosen": -1.0763471126556396, |
|
"logps/rejected": -1.1715179681777954, |
|
"loss": 1.6805, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1526942253112793, |
|
"rewards/margins": 0.19034144282341003, |
|
"rewards/rejected": -2.343035936355591, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 25.129161862706873, |
|
"learning_rate": 2.2290916976281427e-08, |
|
"logits/chosen": -2.001932144165039, |
|
"logits/rejected": -1.9959653615951538, |
|
"logps/chosen": -1.0039644241333008, |
|
"logps/rejected": -1.14584219455719, |
|
"loss": 1.6336, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0079288482666016, |
|
"rewards/margins": 0.2837551236152649, |
|
"rewards/rejected": -2.29168438911438, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 21.814621079989646, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -1.9791107177734375, |
|
"logits/rejected": -1.9726155996322632, |
|
"logps/chosen": -1.0205609798431396, |
|
"logps/rejected": -1.221226453781128, |
|
"loss": 1.5273, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0411219596862793, |
|
"rewards/margins": 0.40133076906204224, |
|
"rewards/rejected": -2.442452907562256, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 23.142401741726285, |
|
"learning_rate": 2.1874571586252177e-08, |
|
"logits/chosen": -2.0248005390167236, |
|
"logits/rejected": -2.0180201530456543, |
|
"logps/chosen": -1.032070279121399, |
|
"logps/rejected": -1.116600513458252, |
|
"loss": 1.6925, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.064140558242798, |
|
"rewards/margins": 0.1690603494644165, |
|
"rewards/rejected": -2.233201026916504, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 23.772650056451177, |
|
"learning_rate": 2.1666721135069037e-08, |
|
"logits/chosen": -2.0153000354766846, |
|
"logits/rejected": -2.0120010375976562, |
|
"logps/chosen": -1.1153454780578613, |
|
"logps/rejected": -1.216801404953003, |
|
"loss": 1.6773, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.2306909561157227, |
|
"rewards/margins": 0.20291194319725037, |
|
"rewards/rejected": -2.433602809906006, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 18.806401369499547, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -2.0624032020568848, |
|
"logits/rejected": -2.054765224456787, |
|
"logps/chosen": -0.9628511667251587, |
|
"logps/rejected": -1.1157209873199463, |
|
"loss": 1.6013, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9257023334503174, |
|
"rewards/margins": 0.30573925375938416, |
|
"rewards/rejected": -2.2314419746398926, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 26.89395254493913, |
|
"learning_rate": 2.1251737774480915e-08, |
|
"logits/chosen": -2.0389955043792725, |
|
"logits/rejected": -2.029460906982422, |
|
"logps/chosen": -1.1734710931777954, |
|
"logps/rejected": -1.2716848850250244, |
|
"loss": 1.7022, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.346942186355591, |
|
"rewards/margins": 0.19642747938632965, |
|
"rewards/rejected": -2.543369770050049, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 20.139202633479794, |
|
"learning_rate": 2.104463403669264e-08, |
|
"logits/chosen": -1.9957828521728516, |
|
"logits/rejected": -1.9933998584747314, |
|
"logps/chosen": -1.0485862493515015, |
|
"logps/rejected": -1.203293800354004, |
|
"loss": 1.5981, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.097172498703003, |
|
"rewards/margins": 0.3094151020050049, |
|
"rewards/rejected": -2.406587600708008, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 20.172677879731765, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.9755080938339233, |
|
"logits/rejected": -1.971238136291504, |
|
"logps/chosen": -0.9422762989997864, |
|
"logps/rejected": -1.0841569900512695, |
|
"loss": 1.604, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.8845525979995728, |
|
"rewards/margins": 0.28376150131225586, |
|
"rewards/rejected": -2.168313980102539, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 22.349389968529955, |
|
"learning_rate": 2.063127523779219e-08, |
|
"logits/chosen": -1.9753835201263428, |
|
"logits/rejected": -1.9711145162582397, |
|
"logps/chosen": -1.0105019807815552, |
|
"logps/rejected": -1.208362340927124, |
|
"loss": 1.5202, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0210039615631104, |
|
"rewards/margins": 0.3957210183143616, |
|
"rewards/rejected": -2.416724681854248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 24.22087521172144, |
|
"learning_rate": 2.0425049234096737e-08, |
|
"logits/chosen": -1.9858808517456055, |
|
"logits/rejected": -1.9803756475448608, |
|
"logps/chosen": -1.0131169557571411, |
|
"logps/rejected": -1.1394225358963013, |
|
"loss": 1.6405, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0262339115142822, |
|
"rewards/margins": 0.2526114284992218, |
|
"rewards/rejected": -2.2788450717926025, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 22.958910412289878, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.9544498920440674, |
|
"logits/rejected": -1.9534263610839844, |
|
"logps/chosen": -1.0182510614395142, |
|
"logps/rejected": -1.177302598953247, |
|
"loss": 1.5993, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0365021228790283, |
|
"rewards/margins": 0.31810325384140015, |
|
"rewards/rejected": -2.354605197906494, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 21.62907044306249, |
|
"learning_rate": 2.0013576501378823e-08, |
|
"logits/chosen": -1.9767091274261475, |
|
"logits/rejected": -1.9701858758926392, |
|
"logps/chosen": -1.0102381706237793, |
|
"logps/rejected": -1.1591678857803345, |
|
"loss": 1.6085, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0204763412475586, |
|
"rewards/margins": 0.2978593409061432, |
|
"rewards/rejected": -2.318335771560669, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 24.093400959011806, |
|
"learning_rate": 1.9808358697190426e-08, |
|
"logits/chosen": -1.9653217792510986, |
|
"logits/rejected": -1.9615375995635986, |
|
"logps/chosen": -0.936444878578186, |
|
"logps/rejected": -1.0819050073623657, |
|
"loss": 1.6163, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.872889757156372, |
|
"rewards/margins": 0.2909203767776489, |
|
"rewards/rejected": -2.1638100147247314, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 25.800243545689497, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -2.0147223472595215, |
|
"logits/rejected": -2.004798412322998, |
|
"logps/chosen": -0.9533373713493347, |
|
"logps/rejected": -1.1323670148849487, |
|
"loss": 1.5547, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9066747426986694, |
|
"rewards/margins": 0.3580593168735504, |
|
"rewards/rejected": -2.2647340297698975, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 24.254590998607096, |
|
"learning_rate": 1.9399032341961886e-08, |
|
"logits/chosen": -1.9753398895263672, |
|
"logits/rejected": -1.9594964981079102, |
|
"logps/chosen": -0.9942548871040344, |
|
"logps/rejected": -1.0747997760772705, |
|
"loss": 1.7057, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.9885097742080688, |
|
"rewards/margins": 0.1610899567604065, |
|
"rewards/rejected": -2.149599552154541, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 30.793726543288717, |
|
"learning_rate": 1.9194952564874323e-08, |
|
"logits/chosen": -2.0176210403442383, |
|
"logits/rejected": -2.011768341064453, |
|
"logps/chosen": -1.0701502561569214, |
|
"logps/rejected": -1.2221211194992065, |
|
"loss": 1.5861, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1403005123138428, |
|
"rewards/margins": 0.30394163727760315, |
|
"rewards/rejected": -2.444242238998413, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 23.34142631480501, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.974826455116272, |
|
"logits/rejected": -1.968951940536499, |
|
"logps/chosen": -1.0685060024261475, |
|
"logps/rejected": -1.1608997583389282, |
|
"loss": 1.684, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.137012004852295, |
|
"rewards/margins": 0.1847873032093048, |
|
"rewards/rejected": -2.3217995166778564, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 20.078022144210134, |
|
"learning_rate": 1.8788031540572327e-08, |
|
"logits/chosen": -1.9810216426849365, |
|
"logits/rejected": -1.9728477001190186, |
|
"logps/chosen": -1.0035111904144287, |
|
"logps/rejected": -1.1611225605010986, |
|
"loss": 1.5875, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0070223808288574, |
|
"rewards/margins": 0.31522226333618164, |
|
"rewards/rejected": -2.3222451210021973, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 20.268887631284258, |
|
"learning_rate": 1.858521889822565e-08, |
|
"logits/chosen": -1.9952818155288696, |
|
"logits/rejected": -1.9979689121246338, |
|
"logps/chosen": -0.977008044719696, |
|
"logps/rejected": -1.093328595161438, |
|
"loss": 1.6528, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.954016089439392, |
|
"rewards/margins": 0.23264119029045105, |
|
"rewards/rejected": -2.186657190322876, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 20.65067514824257, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -1.9850199222564697, |
|
"logits/rejected": -1.9801909923553467, |
|
"logps/chosen": -0.9883711934089661, |
|
"logps/rejected": -1.1270935535430908, |
|
"loss": 1.6028, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9767423868179321, |
|
"rewards/margins": 0.2774447500705719, |
|
"rewards/rejected": -2.2541871070861816, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 26.07557235786858, |
|
"learning_rate": 1.8180960636595234e-08, |
|
"logits/chosen": -1.9607089757919312, |
|
"logits/rejected": -1.9584417343139648, |
|
"logps/chosen": -1.0416334867477417, |
|
"logps/rejected": -1.1955784559249878, |
|
"loss": 1.5974, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0832669734954834, |
|
"rewards/margins": 0.3078901767730713, |
|
"rewards/rejected": -2.3911569118499756, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 23.473893472630195, |
|
"learning_rate": 1.7979543434998015e-08, |
|
"logits/chosen": -2.029837131500244, |
|
"logits/rejected": -2.034827709197998, |
|
"logps/chosen": -1.1301052570343018, |
|
"logps/rejected": -1.2223666906356812, |
|
"loss": 1.6764, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2602105140686035, |
|
"rewards/margins": 0.18452298641204834, |
|
"rewards/rejected": -2.4447333812713623, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 31.463222217407818, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -1.9924646615982056, |
|
"logits/rejected": -1.9858839511871338, |
|
"logps/chosen": -1.1004579067230225, |
|
"logps/rejected": -1.2025467157363892, |
|
"loss": 1.6833, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.200915813446045, |
|
"rewards/margins": 0.20417769253253937, |
|
"rewards/rejected": -2.4050934314727783, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 27.42682717849318, |
|
"learning_rate": 1.757820368323213e-08, |
|
"logits/chosen": -1.9845936298370361, |
|
"logits/rejected": -1.9750339984893799, |
|
"logps/chosen": -1.110480785369873, |
|
"logps/rejected": -1.2791945934295654, |
|
"loss": 1.5742, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.220961570739746, |
|
"rewards/margins": 0.33742767572402954, |
|
"rewards/rejected": -2.558389186859131, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 25.747238627810656, |
|
"learning_rate": 1.7378309345590803e-08, |
|
"logits/chosen": -2.001929759979248, |
|
"logits/rejected": -2.0115175247192383, |
|
"logps/chosen": -1.091799020767212, |
|
"logps/rejected": -1.242598533630371, |
|
"loss": 1.6023, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.183598041534424, |
|
"rewards/margins": 0.30159884691238403, |
|
"rewards/rejected": -2.485197067260742, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 23.56149851612972, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -2.049691915512085, |
|
"logits/rejected": -2.0456790924072266, |
|
"logps/chosen": -1.0638725757598877, |
|
"logps/rejected": -1.2155239582061768, |
|
"loss": 1.5985, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1277451515197754, |
|
"rewards/margins": 0.3033028244972229, |
|
"rewards/rejected": -2.4310479164123535, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 25.342673498874493, |
|
"learning_rate": 1.698014200453624e-08, |
|
"logits/chosen": -2.0035059452056885, |
|
"logits/rejected": -2.0112483501434326, |
|
"logps/chosen": -1.036697506904602, |
|
"logps/rejected": -1.171596884727478, |
|
"loss": 1.6083, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.073395013809204, |
|
"rewards/margins": 0.26979896426200867, |
|
"rewards/rejected": -2.343193769454956, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 29.2866917522338, |
|
"learning_rate": 1.6781896990642964e-08, |
|
"logits/chosen": -1.9394210577011108, |
|
"logits/rejected": -1.9367926120758057, |
|
"logps/chosen": -1.1517702341079712, |
|
"logps/rejected": -1.2503753900527954, |
|
"loss": 1.6757, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.3035404682159424, |
|
"rewards/margins": 0.19721055030822754, |
|
"rewards/rejected": -2.500750780105591, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 27.895712891853, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -2.044224262237549, |
|
"logits/rejected": -2.031148672103882, |
|
"logps/chosen": -1.0125457048416138, |
|
"logps/rejected": -1.1346945762634277, |
|
"loss": 1.6456, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.0250914096832275, |
|
"rewards/margins": 0.24429789185523987, |
|
"rewards/rejected": -2.2693891525268555, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 25.265909243180825, |
|
"learning_rate": 1.638715395417418e-08, |
|
"logits/chosen": -2.0184195041656494, |
|
"logits/rejected": -2.016491413116455, |
|
"logps/chosen": -1.0754286050796509, |
|
"logps/rejected": -1.220522165298462, |
|
"loss": 1.6055, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1508572101593018, |
|
"rewards/margins": 0.2901870906352997, |
|
"rewards/rejected": -2.441044330596924, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 26.14092408303687, |
|
"learning_rate": 1.619068368040416e-08, |
|
"logits/chosen": -2.014909267425537, |
|
"logits/rejected": -2.0107204914093018, |
|
"logps/chosen": -1.0064074993133545, |
|
"logps/rejected": -1.1946723461151123, |
|
"loss": 1.5392, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.012814998626709, |
|
"rewards/margins": 0.37652960419654846, |
|
"rewards/rejected": -2.3893446922302246, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 20.308279158480953, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -1.9590470790863037, |
|
"logits/rejected": -1.9595496654510498, |
|
"logps/chosen": -1.0389412641525269, |
|
"logps/rejected": -1.1585092544555664, |
|
"loss": 1.6408, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0778825283050537, |
|
"rewards/margins": 0.23913617432117462, |
|
"rewards/rejected": -2.317018508911133, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 24.52856159698312, |
|
"learning_rate": 1.5799614676066906e-08, |
|
"logits/chosen": -2.0611324310302734, |
|
"logits/rejected": -2.0581214427948, |
|
"logps/chosen": -0.9563993215560913, |
|
"logps/rejected": -1.104172706604004, |
|
"loss": 1.5929, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9127986431121826, |
|
"rewards/margins": 0.2955467998981476, |
|
"rewards/rejected": -2.208345413208008, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 18.949757053781095, |
|
"learning_rate": 1.560504343603587e-08, |
|
"logits/chosen": -1.973333716392517, |
|
"logits/rejected": -1.9741255044937134, |
|
"logps/chosen": -1.0730575323104858, |
|
"logps/rejected": -1.2376629114151, |
|
"loss": 1.576, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.1461150646209717, |
|
"rewards/margins": 0.32921046018600464, |
|
"rewards/rejected": -2.4753258228302, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 22.302877835841354, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -2.061547040939331, |
|
"logits/rejected": -2.0595688819885254, |
|
"logps/chosen": -1.0353928804397583, |
|
"logps/rejected": -1.1622211933135986, |
|
"loss": 1.6306, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0707857608795166, |
|
"rewards/margins": 0.2536565363407135, |
|
"rewards/rejected": -2.3244423866271973, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 30.115588938614223, |
|
"learning_rate": 1.5217895867061227e-08, |
|
"logits/chosen": -2.0031933784484863, |
|
"logits/rejected": -1.9973207712173462, |
|
"logps/chosen": -1.0904591083526611, |
|
"logps/rejected": -1.1972274780273438, |
|
"loss": 1.6758, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1809182167053223, |
|
"rewards/margins": 0.21353654563426971, |
|
"rewards/rejected": -2.3944549560546875, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0028818443804033, |
|
"grad_norm": 26.508187485308607, |
|
"learning_rate": 1.5025346752993098e-08, |
|
"logits/chosen": -1.9899861812591553, |
|
"logits/rejected": -1.9919507503509521, |
|
"logps/chosen": -1.0795637369155884, |
|
"logps/rejected": -1.2126731872558594, |
|
"loss": 1.6285, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1591274738311768, |
|
"rewards/margins": 0.26621872186660767, |
|
"rewards/rejected": -2.4253463745117188, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.010086455331412, |
|
"grad_norm": 28.64222881063969, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -2.044374942779541, |
|
"logits/rejected": -2.0468382835388184, |
|
"logps/chosen": -1.0054826736450195, |
|
"logps/rejected": -1.2021020650863647, |
|
"loss": 1.5559, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.010965347290039, |
|
"rewards/margins": 0.3932386338710785, |
|
"rewards/rejected": -2.4042041301727295, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 21.462192948449626, |
|
"learning_rate": 1.4642365541781993e-08, |
|
"logits/chosen": -1.9534351825714111, |
|
"logits/rejected": -1.9449405670166016, |
|
"logps/chosen": -1.0361294746398926, |
|
"logps/rejected": -1.2111116647720337, |
|
"loss": 1.5658, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.072258949279785, |
|
"rewards/margins": 0.34996432065963745, |
|
"rewards/rejected": -2.4222233295440674, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0244956772334293, |
|
"grad_norm": 20.76859057795762, |
|
"learning_rate": 1.4451960366636745e-08, |
|
"logits/chosen": -2.0219428539276123, |
|
"logits/rejected": -2.033188819885254, |
|
"logps/chosen": -1.0458028316497803, |
|
"logps/rejected": -1.1884043216705322, |
|
"loss": 1.605, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0916056632995605, |
|
"rewards/margins": 0.28520235419273376, |
|
"rewards/rejected": -2.3768086433410645, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.031700288184438, |
|
"grad_norm": 23.353169437189916, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -2.0090036392211914, |
|
"logits/rejected": -2.005481243133545, |
|
"logps/chosen": -1.0371921062469482, |
|
"logps/rejected": -1.2073792219161987, |
|
"loss": 1.5646, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0743842124938965, |
|
"rewards/margins": 0.3403744101524353, |
|
"rewards/rejected": -2.4147584438323975, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0389048991354466, |
|
"grad_norm": 23.121114498464102, |
|
"learning_rate": 1.407338779981389e-08, |
|
"logits/chosen": -1.9874942302703857, |
|
"logits/rejected": -1.9855642318725586, |
|
"logps/chosen": -0.9219571948051453, |
|
"logps/rejected": -1.1140168905258179, |
|
"loss": 1.5213, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.8439143896102905, |
|
"rewards/margins": 0.3841188848018646, |
|
"rewards/rejected": -2.2280337810516357, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 25.25341321880136, |
|
"learning_rate": 1.3885247020224534e-08, |
|
"logits/chosen": -1.9960615634918213, |
|
"logits/rejected": -1.9914722442626953, |
|
"logps/chosen": -1.009881854057312, |
|
"logps/rejected": -1.1496226787567139, |
|
"loss": 1.6128, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.019763708114624, |
|
"rewards/margins": 0.2794816493988037, |
|
"rewards/rejected": -2.2992453575134277, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.053314121037464, |
|
"grad_norm": 20.971301133081738, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -2.0032501220703125, |
|
"logits/rejected": -2.0000967979431152, |
|
"logps/chosen": -1.0347437858581543, |
|
"logps/rejected": -1.1370530128479004, |
|
"loss": 1.6687, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.0694875717163086, |
|
"rewards/margins": 0.20461849868297577, |
|
"rewards/rejected": -2.274106025695801, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0605187319884726, |
|
"grad_norm": 21.9775203889279, |
|
"learning_rate": 1.3511322595359925e-08, |
|
"logits/chosen": -2.033581256866455, |
|
"logits/rejected": -2.0254645347595215, |
|
"logps/chosen": -0.944128692150116, |
|
"logps/rejected": -1.125150442123413, |
|
"loss": 1.5476, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.888257384300232, |
|
"rewards/margins": 0.3620434105396271, |
|
"rewards/rejected": -2.250300884246826, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0677233429394812, |
|
"grad_norm": 21.063191206550247, |
|
"learning_rate": 1.3325565235427716e-08, |
|
"logits/chosen": -2.0236756801605225, |
|
"logits/rejected": -2.0223705768585205, |
|
"logps/chosen": -0.9903309941291809, |
|
"logps/rejected": -1.145707607269287, |
|
"loss": 1.5929, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9806619882583618, |
|
"rewards/margins": 0.31075319647789, |
|
"rewards/rejected": -2.291415214538574, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 21.584640327110584, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -1.986659049987793, |
|
"logits/rejected": -1.9890410900115967, |
|
"logps/chosen": -0.9793311953544617, |
|
"logps/rejected": -1.123835802078247, |
|
"loss": 1.6021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9586623907089233, |
|
"rewards/margins": 0.289009153842926, |
|
"rewards/rejected": -2.247671604156494, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.0821325648414986, |
|
"grad_norm": 22.942871845377027, |
|
"learning_rate": 1.2956525509522451e-08, |
|
"logits/chosen": -1.971453309059143, |
|
"logits/rejected": -1.971011757850647, |
|
"logps/chosen": -1.1158349514007568, |
|
"logps/rejected": -1.2264450788497925, |
|
"loss": 1.6636, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2316699028015137, |
|
"rewards/margins": 0.22122013568878174, |
|
"rewards/rejected": -2.452890157699585, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.089337175792507, |
|
"grad_norm": 23.820159972911735, |
|
"learning_rate": 1.2773269085518267e-08, |
|
"logits/chosen": -2.0029759407043457, |
|
"logits/rejected": -2.004696846008301, |
|
"logps/chosen": -1.0825508832931519, |
|
"logps/rejected": -1.220155119895935, |
|
"loss": 1.6086, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1651017665863037, |
|
"rewards/margins": 0.2752082049846649, |
|
"rewards/rejected": -2.44031023979187, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.096541786743516, |
|
"grad_norm": 26.46761426332852, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -2.0609946250915527, |
|
"logits/rejected": -2.054304599761963, |
|
"logps/chosen": -1.067638635635376, |
|
"logps/rejected": -1.1893014907836914, |
|
"loss": 1.6485, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.135277271270752, |
|
"rewards/margins": 0.24332574009895325, |
|
"rewards/rejected": -2.378602981567383, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 24.717381658629428, |
|
"learning_rate": 1.2409347525350775e-08, |
|
"logits/chosen": -2.024118185043335, |
|
"logits/rejected": -2.014434337615967, |
|
"logps/chosen": -1.114039659500122, |
|
"logps/rejected": -1.2736414670944214, |
|
"loss": 1.5805, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.228079319000244, |
|
"rewards/margins": 0.31920376420021057, |
|
"rewards/rejected": -2.5472829341888428, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.110951008645533, |
|
"grad_norm": 26.147515087768657, |
|
"learning_rate": 1.2228707971370421e-08, |
|
"logits/chosen": -2.016010284423828, |
|
"logits/rejected": -2.009105682373047, |
|
"logps/chosen": -0.9981368184089661, |
|
"logps/rejected": -1.1219263076782227, |
|
"loss": 1.6463, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9962736368179321, |
|
"rewards/margins": 0.24757930636405945, |
|
"rewards/rejected": -2.2438526153564453, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.118155619596542, |
|
"grad_norm": 25.906757042346783, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -2.010725498199463, |
|
"logits/rejected": -1.994248628616333, |
|
"logps/chosen": -1.1241233348846436, |
|
"logps/rejected": -1.2454028129577637, |
|
"loss": 1.6413, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.248246669769287, |
|
"rewards/margins": 0.2425590455532074, |
|
"rewards/rejected": -2.4908056259155273, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.1253602305475505, |
|
"grad_norm": 34.07975709431921, |
|
"learning_rate": 1.187013480579762e-08, |
|
"logits/chosen": -2.0079312324523926, |
|
"logits/rejected": -2.010749340057373, |
|
"logps/chosen": -1.0515167713165283, |
|
"logps/rejected": -1.1958991289138794, |
|
"loss": 1.6215, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1030335426330566, |
|
"rewards/margins": 0.2887645661830902, |
|
"rewards/rejected": -2.391798257827759, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 48.68658598001818, |
|
"learning_rate": 1.1692226400418073e-08, |
|
"logits/chosen": -1.9440683126449585, |
|
"logits/rejected": -1.9426988363265991, |
|
"logps/chosen": -1.0936279296875, |
|
"logps/rejected": -1.2332271337509155, |
|
"loss": 1.6411, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.187255859375, |
|
"rewards/margins": 0.27919843792915344, |
|
"rewards/rejected": -2.466454267501831, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.139769452449568, |
|
"grad_norm": 20.37382547923442, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -1.9710067510604858, |
|
"logits/rejected": -1.9651463031768799, |
|
"logps/chosen": -1.017547845840454, |
|
"logps/rejected": -1.2117187976837158, |
|
"loss": 1.5272, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.035095691680908, |
|
"rewards/margins": 0.3883420526981354, |
|
"rewards/rejected": -2.4234375953674316, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1469740634005765, |
|
"grad_norm": 23.62593123554341, |
|
"learning_rate": 1.133922847472496e-08, |
|
"logits/chosen": -1.9900035858154297, |
|
"logits/rejected": -1.9909718036651611, |
|
"logps/chosen": -1.1151740550994873, |
|
"logps/rejected": -1.224129319190979, |
|
"loss": 1.6777, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2303481101989746, |
|
"rewards/margins": 0.2179100066423416, |
|
"rewards/rejected": -2.448258638381958, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.154178674351585, |
|
"grad_norm": 27.11626352654653, |
|
"learning_rate": 1.1164163768707952e-08, |
|
"logits/chosen": -1.9934008121490479, |
|
"logits/rejected": -1.9880987405776978, |
|
"logps/chosen": -1.01137375831604, |
|
"logps/rejected": -1.1608995199203491, |
|
"loss": 1.6045, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.02274751663208, |
|
"rewards/margins": 0.29905155301094055, |
|
"rewards/rejected": -2.3217990398406982, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 21.186985987415447, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -1.9735314846038818, |
|
"logits/rejected": -1.972663164138794, |
|
"logps/chosen": -1.0251822471618652, |
|
"logps/rejected": -1.2192916870117188, |
|
"loss": 1.5629, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0503644943237305, |
|
"rewards/margins": 0.38821902871131897, |
|
"rewards/rejected": -2.4385833740234375, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1685878962536025, |
|
"grad_norm": 22.398067779662192, |
|
"learning_rate": 1.0816964401097739e-08, |
|
"logits/chosen": -1.9556314945220947, |
|
"logits/rejected": -1.9524948596954346, |
|
"logps/chosen": -0.9635698199272156, |
|
"logps/rejected": -1.0958101749420166, |
|
"loss": 1.6309, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9271396398544312, |
|
"rewards/margins": 0.2644805312156677, |
|
"rewards/rejected": -2.191620349884033, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.175792507204611, |
|
"grad_norm": 24.597936445600215, |
|
"learning_rate": 1.0644854146186406e-08, |
|
"logits/chosen": -2.0202081203460693, |
|
"logits/rejected": -2.0140926837921143, |
|
"logps/chosen": -1.034292459487915, |
|
"logps/rejected": -1.203952431678772, |
|
"loss": 1.5761, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.06858491897583, |
|
"rewards/margins": 0.33932000398635864, |
|
"rewards/rejected": -2.407904863357544, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.18299711815562, |
|
"grad_norm": 22.562346833112592, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -1.9993441104888916, |
|
"logits/rejected": -1.9910335540771484, |
|
"logps/chosen": -1.0249227285385132, |
|
"logps/rejected": -1.1977559328079224, |
|
"loss": 1.5632, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0498454570770264, |
|
"rewards/margins": 0.3456666171550751, |
|
"rewards/rejected": -2.3955118656158447, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 23.766859260147648, |
|
"learning_rate": 1.030367298650201e-08, |
|
"logits/chosen": -2.01485013961792, |
|
"logits/rejected": -2.0149147510528564, |
|
"logps/chosen": -1.0467994213104248, |
|
"logps/rejected": -1.208519697189331, |
|
"loss": 1.5783, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0935988426208496, |
|
"rewards/margins": 0.32344070076942444, |
|
"rewards/rejected": -2.417039394378662, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1974063400576367, |
|
"grad_norm": 25.521958874894615, |
|
"learning_rate": 1.0134626065355675e-08, |
|
"logits/chosen": -2.0649075508117676, |
|
"logits/rejected": -2.0617613792419434, |
|
"logps/chosen": -1.0284671783447266, |
|
"logps/rejected": -1.1858323812484741, |
|
"loss": 1.6044, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.056934356689453, |
|
"rewards/margins": 0.31473028659820557, |
|
"rewards/rejected": -2.3716647624969482, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.2046109510086453, |
|
"grad_norm": 23.646119810658234, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -2.0089523792266846, |
|
"logits/rejected": -2.004260778427124, |
|
"logps/chosen": -1.0687427520751953, |
|
"logps/rejected": -1.2249877452850342, |
|
"loss": 1.6068, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1374855041503906, |
|
"rewards/margins": 0.31248974800109863, |
|
"rewards/rejected": -2.4499754905700684, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.211815561959654, |
|
"grad_norm": 18.119200799487714, |
|
"learning_rate": 9.799678956121976e-09, |
|
"logits/chosen": -1.9656593799591064, |
|
"logits/rejected": -1.961615800857544, |
|
"logps/chosen": -1.0386877059936523, |
|
"logps/rejected": -1.1543315649032593, |
|
"loss": 1.6312, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0773754119873047, |
|
"rewards/margins": 0.2312876284122467, |
|
"rewards/rejected": -2.3086631298065186, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 27.943065266424814, |
|
"learning_rate": 9.633802313433314e-09, |
|
"logits/chosen": -1.9396989345550537, |
|
"logits/rejected": -1.9456799030303955, |
|
"logps/chosen": -1.026071310043335, |
|
"logps/rejected": -1.1421881914138794, |
|
"loss": 1.6325, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.05214262008667, |
|
"rewards/margins": 0.23223355412483215, |
|
"rewards/rejected": -2.284376382827759, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2262247838616713, |
|
"grad_norm": 24.15226654416461, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -2.003962278366089, |
|
"logits/rejected": -1.9982340335845947, |
|
"logps/chosen": -1.0210195779800415, |
|
"logps/rejected": -1.1483935117721558, |
|
"loss": 1.6568, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.042039155960083, |
|
"rewards/margins": 0.25474798679351807, |
|
"rewards/rejected": -2.2967870235443115, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.23342939481268, |
|
"grad_norm": 23.10826240141973, |
|
"learning_rate": 9.305301153307949e-09, |
|
"logits/chosen": -2.0031771659851074, |
|
"logits/rejected": -2.0111162662506104, |
|
"logps/chosen": -0.9573473930358887, |
|
"logps/rejected": -1.1340956687927246, |
|
"loss": 1.5703, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9146947860717773, |
|
"rewards/margins": 0.35349661111831665, |
|
"rewards/rejected": -2.268191337585449, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2406340057636887, |
|
"grad_norm": 22.536059648116893, |
|
"learning_rate": 9.142699728146336e-09, |
|
"logits/chosen": -1.9767051935195923, |
|
"logits/rejected": -1.9701921939849854, |
|
"logps/chosen": -1.040269136428833, |
|
"logps/rejected": -1.1842563152313232, |
|
"loss": 1.6209, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.080538272857666, |
|
"rewards/margins": 0.28797417879104614, |
|
"rewards/rejected": -2.3685126304626465, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 21.32782505584108, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -1.9988371133804321, |
|
"logits/rejected": -2.0019822120666504, |
|
"logps/chosen": -1.000981092453003, |
|
"logps/rejected": -1.1834334135055542, |
|
"loss": 1.5615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.001962184906006, |
|
"rewards/margins": 0.36490458250045776, |
|
"rewards/rejected": -2.3668668270111084, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.255043227665706, |
|
"grad_norm": 26.386255833154394, |
|
"learning_rate": 8.820852337865611e-09, |
|
"logits/chosen": -2.0248231887817383, |
|
"logits/rejected": -2.0212666988372803, |
|
"logps/chosen": -1.0024456977844238, |
|
"logps/rejected": -1.1622812747955322, |
|
"loss": 1.5876, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0048913955688477, |
|
"rewards/margins": 0.3196714520454407, |
|
"rewards/rejected": -2.3245625495910645, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.2622478386167146, |
|
"grad_norm": 21.331730880197714, |
|
"learning_rate": 8.661628997289044e-09, |
|
"logits/chosen": -1.9632513523101807, |
|
"logits/rejected": -1.9592435359954834, |
|
"logps/chosen": -1.0223203897476196, |
|
"logps/rejected": -1.1876834630966187, |
|
"loss": 1.5872, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0446407794952393, |
|
"rewards/margins": 0.3307264447212219, |
|
"rewards/rejected": -2.3753669261932373, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2694524495677233, |
|
"grad_norm": 19.742128897565276, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.9689233303070068, |
|
"logits/rejected": -1.9675970077514648, |
|
"logps/chosen": -1.0326251983642578, |
|
"logps/rejected": -1.2057253122329712, |
|
"loss": 1.5801, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0652503967285156, |
|
"rewards/margins": 0.34619995951652527, |
|
"rewards/rejected": -2.4114506244659424, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 28.382864078102244, |
|
"learning_rate": 8.346638988193636e-09, |
|
"logits/chosen": -1.9992077350616455, |
|
"logits/rejected": -1.9941097497940063, |
|
"logps/chosen": -0.9314563870429993, |
|
"logps/rejected": -1.094285249710083, |
|
"loss": 1.5924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.8629127740859985, |
|
"rewards/margins": 0.3256576359272003, |
|
"rewards/rejected": -2.188570499420166, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.2838616714697406, |
|
"grad_norm": 27.633265489696246, |
|
"learning_rate": 8.19089446217176e-09, |
|
"logits/chosen": -1.9723091125488281, |
|
"logits/rejected": -1.9623254537582397, |
|
"logps/chosen": -1.0091623067855835, |
|
"logps/rejected": -1.2127363681793213, |
|
"loss": 1.5228, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.018324613571167, |
|
"rewards/margins": 0.40714770555496216, |
|
"rewards/rejected": -2.4254727363586426, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2910662824207493, |
|
"grad_norm": 20.132916808178752, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.9806181192398071, |
|
"logits/rejected": -1.9802868366241455, |
|
"logps/chosen": -0.9595286250114441, |
|
"logps/rejected": -1.0544369220733643, |
|
"loss": 1.6841, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.9190572500228882, |
|
"rewards/margins": 0.18981659412384033, |
|
"rewards/rejected": -2.1088738441467285, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.298270893371758, |
|
"grad_norm": 26.876429643756662, |
|
"learning_rate": 7.882961107395416e-09, |
|
"logits/chosen": -1.986196517944336, |
|
"logits/rejected": -1.9807474613189697, |
|
"logps/chosen": -1.1408748626708984, |
|
"logps/rejected": -1.1967402696609497, |
|
"loss": 1.7545, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.281749725341797, |
|
"rewards/margins": 0.11173073202371597, |
|
"rewards/rejected": -2.3934805393218994, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 30.561781481040974, |
|
"learning_rate": 7.73079392508428e-09, |
|
"logits/chosen": -1.961582899093628, |
|
"logits/rejected": -1.9609956741333008, |
|
"logps/chosen": -1.0990729331970215, |
|
"logps/rejected": -1.302329659461975, |
|
"loss": 1.5607, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.198145866394043, |
|
"rewards/margins": 0.4065133035182953, |
|
"rewards/rejected": -2.60465931892395, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3126801152737753, |
|
"grad_norm": 26.156879570027353, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.9922736883163452, |
|
"logits/rejected": -1.989356279373169, |
|
"logps/chosen": -1.057533621788025, |
|
"logps/rejected": -1.1920627355575562, |
|
"loss": 1.63, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.11506724357605, |
|
"rewards/margins": 0.2690581679344177, |
|
"rewards/rejected": -2.3841254711151123, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.319884726224784, |
|
"grad_norm": 27.66473676438466, |
|
"learning_rate": 7.43011203348704e-09, |
|
"logits/chosen": -1.9100353717803955, |
|
"logits/rejected": -1.9067933559417725, |
|
"logps/chosen": -1.061374306678772, |
|
"logps/rejected": -1.1472381353378296, |
|
"loss": 1.702, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.122748613357544, |
|
"rewards/margins": 0.17172771692276, |
|
"rewards/rejected": -2.294476270675659, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.3270893371757926, |
|
"grad_norm": 22.452700867913006, |
|
"learning_rate": 7.281618460896344e-09, |
|
"logits/chosen": -1.9864110946655273, |
|
"logits/rejected": -1.9839799404144287, |
|
"logps/chosen": -0.9730486869812012, |
|
"logps/rejected": -1.126781702041626, |
|
"loss": 1.59, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9460973739624023, |
|
"rewards/margins": 0.30746573209762573, |
|
"rewards/rejected": -2.253563404083252, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.3342939481268012, |
|
"grad_norm": 24.011321706312618, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -1.9599173069000244, |
|
"logits/rejected": -1.9596458673477173, |
|
"logps/chosen": -1.0112863779067993, |
|
"logps/rejected": -1.159676194190979, |
|
"loss": 1.6227, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0225727558135986, |
|
"rewards/margins": 0.29677945375442505, |
|
"rewards/rejected": -2.319352388381958, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.34149855907781, |
|
"grad_norm": 27.917867695174607, |
|
"learning_rate": 6.988378253821981e-09, |
|
"logits/chosen": -1.966631293296814, |
|
"logits/rejected": -1.9656906127929688, |
|
"logps/chosen": -1.0303471088409424, |
|
"logps/rejected": -1.1579290628433228, |
|
"loss": 1.6334, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0606942176818848, |
|
"rewards/margins": 0.2551640570163727, |
|
"rewards/rejected": -2.3158581256866455, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3487031700288186, |
|
"grad_norm": 24.28298696614824, |
|
"learning_rate": 6.8436522329140186e-09, |
|
"logits/chosen": -1.9699004888534546, |
|
"logits/rejected": -1.9766439199447632, |
|
"logps/chosen": -1.0417693853378296, |
|
"logps/rejected": -1.175391435623169, |
|
"loss": 1.6358, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.083538770675659, |
|
"rewards/margins": 0.2672441601753235, |
|
"rewards/rejected": -2.350782871246338, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3559077809798272, |
|
"grad_norm": 26.189687917994434, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.9680284261703491, |
|
"logits/rejected": -1.9585533142089844, |
|
"logps/chosen": -1.0375484228134155, |
|
"logps/rejected": -1.165076494216919, |
|
"loss": 1.6363, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.075096845626831, |
|
"rewards/margins": 0.25505581498146057, |
|
"rewards/rejected": -2.330152988433838, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 27.52676939865246, |
|
"learning_rate": 6.558039223849668e-09, |
|
"logits/chosen": -2.0244338512420654, |
|
"logits/rejected": -2.014971971511841, |
|
"logps/chosen": -1.044699788093567, |
|
"logps/rejected": -1.2688946723937988, |
|
"loss": 1.5174, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.089399576187134, |
|
"rewards/margins": 0.4483897089958191, |
|
"rewards/rejected": -2.5377893447875977, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.3703170028818445, |
|
"grad_norm": 26.223585423550503, |
|
"learning_rate": 6.417172313108471e-09, |
|
"logits/chosen": -1.9503448009490967, |
|
"logits/rejected": -1.9449748992919922, |
|
"logps/chosen": -0.9975979924201965, |
|
"logps/rejected": -1.136448860168457, |
|
"loss": 1.6211, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.995195984840393, |
|
"rewards/margins": 0.2777020335197449, |
|
"rewards/rejected": -2.272897720336914, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.377521613832853, |
|
"grad_norm": 25.653460005188553, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -2.0117154121398926, |
|
"logits/rejected": -2.003696918487549, |
|
"logps/chosen": -0.9123650789260864, |
|
"logps/rejected": -1.1209781169891357, |
|
"loss": 1.5311, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8247301578521729, |
|
"rewards/margins": 0.4172258973121643, |
|
"rewards/rejected": -2.2419562339782715, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3847262247838614, |
|
"grad_norm": 27.5322217958583, |
|
"learning_rate": 6.139367190322714e-09, |
|
"logits/chosen": -2.0015804767608643, |
|
"logits/rejected": -2.0014004707336426, |
|
"logps/chosen": -1.0666191577911377, |
|
"logps/rejected": -1.2348202466964722, |
|
"loss": 1.579, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1332383155822754, |
|
"rewards/margins": 0.3364020586013794, |
|
"rewards/rejected": -2.4696404933929443, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.39193083573487, |
|
"grad_norm": 19.961391280944266, |
|
"learning_rate": 6.002448506831171e-09, |
|
"logits/chosen": -1.9937254190444946, |
|
"logits/rejected": -1.9888588190078735, |
|
"logps/chosen": -0.9895680546760559, |
|
"logps/rejected": -1.1448198556900024, |
|
"loss": 1.5893, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9791361093521118, |
|
"rewards/margins": 0.31050366163253784, |
|
"rewards/rejected": -2.289639711380005, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3991354466858787, |
|
"grad_norm": 21.81418154842677, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -2.01529598236084, |
|
"logits/rejected": -2.015200138092041, |
|
"logps/chosen": -1.0282292366027832, |
|
"logps/rejected": -1.182253360748291, |
|
"loss": 1.5975, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0564584732055664, |
|
"rewards/margins": 0.3080483078956604, |
|
"rewards/rejected": -2.364506721496582, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.4063400576368874, |
|
"grad_norm": 28.637110711539222, |
|
"learning_rate": 5.7326270190645595e-09, |
|
"logits/chosen": -1.8906781673431396, |
|
"logits/rejected": -1.8924648761749268, |
|
"logps/chosen": -1.0696254968643188, |
|
"logps/rejected": -1.1889064311981201, |
|
"loss": 1.6439, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1392509937286377, |
|
"rewards/margins": 0.23856201767921448, |
|
"rewards/rejected": -2.3778128623962402, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.413544668587896, |
|
"grad_norm": 21.957493188360655, |
|
"learning_rate": 5.599743182125938e-09, |
|
"logits/chosen": -2.0409464836120605, |
|
"logits/rejected": -2.0412380695343018, |
|
"logps/chosen": -1.0565509796142578, |
|
"logps/rejected": -1.20218825340271, |
|
"loss": 1.6005, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1131019592285156, |
|
"rewards/margins": 0.2912743389606476, |
|
"rewards/rejected": -2.40437650680542, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 24.603451670604123, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -2.0415310859680176, |
|
"logits/rejected": -2.0513522624969482, |
|
"logps/chosen": -1.1003459692001343, |
|
"logps/rejected": -1.2101898193359375, |
|
"loss": 1.6691, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.2006919384002686, |
|
"rewards/margins": 0.21968770027160645, |
|
"rewards/rejected": -2.420379638671875, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.4279538904899134, |
|
"grad_norm": 24.665272996536196, |
|
"learning_rate": 5.33807602740658e-09, |
|
"logits/chosen": -2.0148816108703613, |
|
"logits/rejected": -2.008512258529663, |
|
"logps/chosen": -0.9646922945976257, |
|
"logps/rejected": -1.1866120100021362, |
|
"loss": 1.5074, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9293845891952515, |
|
"rewards/margins": 0.44383955001831055, |
|
"rewards/rejected": -2.3732240200042725, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.435158501440922, |
|
"grad_norm": 24.796160501653265, |
|
"learning_rate": 5.209311103746334e-09, |
|
"logits/chosen": -1.9915498495101929, |
|
"logits/rejected": -1.9920551776885986, |
|
"logps/chosen": -1.060661792755127, |
|
"logps/rejected": -1.2435554265975952, |
|
"loss": 1.5707, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.121323585510254, |
|
"rewards/margins": 0.3657872676849365, |
|
"rewards/rejected": -2.4871108531951904, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4423631123919307, |
|
"grad_norm": 29.011430928388492, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.9661788940429688, |
|
"logits/rejected": -1.9654123783111572, |
|
"logps/chosen": -0.9790254831314087, |
|
"logps/rejected": -1.157768964767456, |
|
"loss": 1.5575, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9580509662628174, |
|
"rewards/margins": 0.35748690366744995, |
|
"rewards/rejected": -2.315537929534912, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4495677233429394, |
|
"grad_norm": 22.287201293983628, |
|
"learning_rate": 4.955963821400599e-09, |
|
"logits/chosen": -2.015843152999878, |
|
"logits/rejected": -2.010300397872925, |
|
"logps/chosen": -1.03963041305542, |
|
"logps/rejected": -1.1898655891418457, |
|
"loss": 1.6122, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.07926082611084, |
|
"rewards/margins": 0.30047017335891724, |
|
"rewards/rejected": -2.3797311782836914, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.456772334293948, |
|
"grad_norm": 18.680236697184657, |
|
"learning_rate": 4.831399271982928e-09, |
|
"logits/chosen": -1.9459985494613647, |
|
"logits/rejected": -1.9379857778549194, |
|
"logps/chosen": -1.0498692989349365, |
|
"logps/rejected": -1.19268000125885, |
|
"loss": 1.631, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.099738597869873, |
|
"rewards/margins": 0.2856215536594391, |
|
"rewards/rejected": -2.3853600025177, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.4639769452449567, |
|
"grad_norm": 30.249393666448327, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -2.021432399749756, |
|
"logits/rejected": -2.015430450439453, |
|
"logps/chosen": -1.0544407367706299, |
|
"logps/rejected": -1.2139813899993896, |
|
"loss": 1.6108, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1088814735412598, |
|
"rewards/margins": 0.31908124685287476, |
|
"rewards/rejected": -2.4279627799987793, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.4711815561959654, |
|
"grad_norm": 29.931099930216636, |
|
"learning_rate": 4.58653213790981e-09, |
|
"logits/chosen": -2.000617265701294, |
|
"logits/rejected": -1.993115782737732, |
|
"logps/chosen": -1.0341386795043945, |
|
"logps/rejected": -1.194247841835022, |
|
"loss": 1.5948, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.068277359008789, |
|
"rewards/margins": 0.32021820545196533, |
|
"rewards/rejected": -2.388495683670044, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 22.79733837458826, |
|
"learning_rate": 4.466246766402773e-09, |
|
"logits/chosen": -1.9823423624038696, |
|
"logits/rejected": -1.9762611389160156, |
|
"logps/chosen": -1.047199010848999, |
|
"logps/rejected": -1.2147526741027832, |
|
"loss": 1.5942, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.094398021697998, |
|
"rewards/margins": 0.33510738611221313, |
|
"rewards/rejected": -2.4295053482055664, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4855907780979827, |
|
"grad_norm": 27.618677755187758, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -2.0297904014587402, |
|
"logits/rejected": -2.0301997661590576, |
|
"logps/chosen": -1.0434116125106812, |
|
"logps/rejected": -1.2150681018829346, |
|
"loss": 1.5743, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0868232250213623, |
|
"rewards/margins": 0.3433128595352173, |
|
"rewards/rejected": -2.430136203765869, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4927953890489913, |
|
"grad_norm": 37.06042245982399, |
|
"learning_rate": 4.230014691678016e-09, |
|
"logits/chosen": -1.9870338439941406, |
|
"logits/rejected": -1.9875684976577759, |
|
"logps/chosen": -1.070052146911621, |
|
"logps/rejected": -1.1430634260177612, |
|
"loss": 1.7119, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.140104293823242, |
|
"rewards/margins": 0.14602291584014893, |
|
"rewards/rejected": -2.2861268520355225, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 21.559521464152546, |
|
"learning_rate": 4.114084594599707e-09, |
|
"logits/chosen": -1.9837043285369873, |
|
"logits/rejected": -1.9837700128555298, |
|
"logps/chosen": -1.0196425914764404, |
|
"logps/rejected": -1.254660725593567, |
|
"loss": 1.4967, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.039285182952881, |
|
"rewards/margins": 0.47003644704818726, |
|
"rewards/rejected": -2.509321451187134, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.5072046109510087, |
|
"grad_norm": 25.359081837639064, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -1.9840672016143799, |
|
"logits/rejected": -1.9717687368392944, |
|
"logps/chosen": -1.0251834392547607, |
|
"logps/rejected": -1.1647298336029053, |
|
"loss": 1.6113, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0503668785095215, |
|
"rewards/margins": 0.2790928781032562, |
|
"rewards/rejected": -2.3294596672058105, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.5144092219020173, |
|
"grad_norm": 23.21571489418676, |
|
"learning_rate": 3.886637027473949e-09, |
|
"logits/chosen": -1.9924989938735962, |
|
"logits/rejected": -1.9947795867919922, |
|
"logps/chosen": -1.085985779762268, |
|
"logps/rejected": -1.2627681493759155, |
|
"loss": 1.5681, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.171971559524536, |
|
"rewards/margins": 0.35356515645980835, |
|
"rewards/rejected": -2.525536298751831, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.521613832853026, |
|
"grad_norm": 23.50855743390047, |
|
"learning_rate": 3.775135546051295e-09, |
|
"logits/chosen": -1.9302011728286743, |
|
"logits/rejected": -1.9312467575073242, |
|
"logps/chosen": -1.0346893072128296, |
|
"logps/rejected": -1.173813819885254, |
|
"loss": 1.6196, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.069378614425659, |
|
"rewards/margins": 0.27824902534484863, |
|
"rewards/rejected": -2.347627639770508, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5288184438040346, |
|
"grad_norm": 29.164941549948743, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.9749339818954468, |
|
"logits/rejected": -1.9792273044586182, |
|
"logps/chosen": -1.1444737911224365, |
|
"logps/rejected": -1.2538254261016846, |
|
"loss": 1.6869, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.288947582244873, |
|
"rewards/margins": 0.218703031539917, |
|
"rewards/rejected": -2.507650852203369, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 24.755536040730522, |
|
"learning_rate": 3.556616377404101e-09, |
|
"logits/chosen": -2.0026092529296875, |
|
"logits/rejected": -2.0009195804595947, |
|
"logps/chosen": -1.090807557106018, |
|
"logps/rejected": -1.2610208988189697, |
|
"loss": 1.5663, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.181615114212036, |
|
"rewards/margins": 0.34042656421661377, |
|
"rewards/rejected": -2.5220417976379395, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.543227665706052, |
|
"grad_norm": 24.281136701238548, |
|
"learning_rate": 3.4496140511748125e-09, |
|
"logits/chosen": -1.9856958389282227, |
|
"logits/rejected": -1.98044753074646, |
|
"logps/chosen": -1.0673859119415283, |
|
"logps/rejected": -1.222153663635254, |
|
"loss": 1.5935, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1347718238830566, |
|
"rewards/margins": 0.30953553318977356, |
|
"rewards/rejected": -2.444307327270508, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5504322766570606, |
|
"grad_norm": 36.508959716968505, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.9778625965118408, |
|
"logits/rejected": -1.978600263595581, |
|
"logps/chosen": -1.1016578674316406, |
|
"logps/rejected": -1.225250005722046, |
|
"loss": 1.6389, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.2033157348632812, |
|
"rewards/margins": 0.24718408286571503, |
|
"rewards/rejected": -2.450500011444092, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5576368876080693, |
|
"grad_norm": 28.5877047659264, |
|
"learning_rate": 3.2401615234845693e-09, |
|
"logits/chosen": -2.0043270587921143, |
|
"logits/rejected": -1.9988540410995483, |
|
"logps/chosen": -1.10341477394104, |
|
"logps/rejected": -1.2606594562530518, |
|
"loss": 1.6032, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.20682954788208, |
|
"rewards/margins": 0.31448912620544434, |
|
"rewards/rejected": -2.5213189125061035, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.564841498559078, |
|
"grad_norm": 20.7773010532923, |
|
"learning_rate": 3.1377260456714375e-09, |
|
"logits/chosen": -1.8968908786773682, |
|
"logits/rejected": -1.8885040283203125, |
|
"logps/chosen": -1.0720723867416382, |
|
"logps/rejected": -1.2278337478637695, |
|
"loss": 1.5831, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1441447734832764, |
|
"rewards/margins": 0.3115227222442627, |
|
"rewards/rejected": -2.455667495727539, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.5720461095100866, |
|
"grad_norm": 21.319035140390216, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -2.021033525466919, |
|
"logits/rejected": -2.012899398803711, |
|
"logps/chosen": -1.059136152267456, |
|
"logps/rejected": -1.1730144023895264, |
|
"loss": 1.6541, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.118272304534912, |
|
"rewards/margins": 0.22775661945343018, |
|
"rewards/rejected": -2.3460288047790527, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5792507204610953, |
|
"grad_norm": 21.491101038274447, |
|
"learning_rate": 2.937472665558541e-09, |
|
"logits/chosen": -2.019315242767334, |
|
"logits/rejected": -2.020840644836426, |
|
"logps/chosen": -1.0477993488311768, |
|
"logps/rejected": -1.1753723621368408, |
|
"loss": 1.6434, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0955986976623535, |
|
"rewards/margins": 0.255145788192749, |
|
"rewards/rejected": -2.3507447242736816, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.586455331412104, |
|
"grad_norm": 25.855015008453492, |
|
"learning_rate": 2.8396688402445053e-09, |
|
"logits/chosen": -2.0597152709960938, |
|
"logits/rejected": -2.05248761177063, |
|
"logps/chosen": -1.0254089832305908, |
|
"logps/rejected": -1.2497340440750122, |
|
"loss": 1.5075, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0508179664611816, |
|
"rewards/margins": 0.44864988327026367, |
|
"rewards/rejected": -2.4994680881500244, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 27.89177787255965, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -1.9996436834335327, |
|
"logits/rejected": -1.9933786392211914, |
|
"logps/chosen": -1.1413462162017822, |
|
"logps/rejected": -1.2593032121658325, |
|
"loss": 1.658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.2826924324035645, |
|
"rewards/margins": 0.23591408133506775, |
|
"rewards/rejected": -2.518606424331665, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.6008645533141213, |
|
"grad_norm": 21.719736867269187, |
|
"learning_rate": 2.6487412946432976e-09, |
|
"logits/chosen": -1.9642817974090576, |
|
"logits/rejected": -1.9591954946517944, |
|
"logps/chosen": -1.0852771997451782, |
|
"logps/rejected": -1.2340834140777588, |
|
"loss": 1.609, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1705543994903564, |
|
"rewards/margins": 0.29761233925819397, |
|
"rewards/rejected": -2.4681668281555176, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.60806916426513, |
|
"grad_norm": 27.728503047567223, |
|
"learning_rate": 2.5556309957742024e-09, |
|
"logits/chosen": -1.9747917652130127, |
|
"logits/rejected": -1.969839096069336, |
|
"logps/chosen": -1.0310755968093872, |
|
"logps/rejected": -1.242746353149414, |
|
"loss": 1.5168, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0621511936187744, |
|
"rewards/margins": 0.42334166169166565, |
|
"rewards/rejected": -2.485492706298828, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.6152737752161386, |
|
"grad_norm": 27.86461777769578, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -2.0306482315063477, |
|
"logits/rejected": -2.0319063663482666, |
|
"logps/chosen": -1.1287659406661987, |
|
"logps/rejected": -1.2553170919418335, |
|
"loss": 1.6581, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.2575318813323975, |
|
"rewards/margins": 0.2531023621559143, |
|
"rewards/rejected": -2.510634183883667, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.6224783861671472, |
|
"grad_norm": 22.019685163280375, |
|
"learning_rate": 2.3741500717865987e-09, |
|
"logits/chosen": -1.9839012622833252, |
|
"logits/rejected": -1.9948337078094482, |
|
"logps/chosen": -1.0139714479446411, |
|
"logps/rejected": -1.170982003211975, |
|
"loss": 1.5951, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0279428958892822, |
|
"rewards/margins": 0.3140210211277008, |
|
"rewards/rejected": -2.34196400642395, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.629682997118156, |
|
"grad_norm": 22.05753538801505, |
|
"learning_rate": 2.285792204027678e-09, |
|
"logits/chosen": -1.9756708145141602, |
|
"logits/rejected": -1.9732259511947632, |
|
"logps/chosen": -1.0228625535964966, |
|
"logps/rejected": -1.2338178157806396, |
|
"loss": 1.5053, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.045725107192993, |
|
"rewards/margins": 0.4219103455543518, |
|
"rewards/rejected": -2.4676356315612793, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.636887608069164, |
|
"grad_norm": 25.411892862416398, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -1.9979976415634155, |
|
"logits/rejected": -1.9935623407363892, |
|
"logps/chosen": -1.080296277999878, |
|
"logps/rejected": -1.2212116718292236, |
|
"loss": 1.634, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.160592555999756, |
|
"rewards/margins": 0.28183117508888245, |
|
"rewards/rejected": -2.4424233436584473, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6440922190201728, |
|
"grad_norm": 21.595154576529936, |
|
"learning_rate": 2.113872712509254e-09, |
|
"logits/chosen": -1.9839746952056885, |
|
"logits/rejected": -1.9765218496322632, |
|
"logps/chosen": -1.141282558441162, |
|
"logps/rejected": -1.2657949924468994, |
|
"loss": 1.6491, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.282565116882324, |
|
"rewards/margins": 0.2490251064300537, |
|
"rewards/rejected": -2.531589984893799, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 17.436350715731248, |
|
"learning_rate": 2.0303231739801143e-09, |
|
"logits/chosen": -1.9631779193878174, |
|
"logits/rejected": -1.9526808261871338, |
|
"logps/chosen": -1.030444860458374, |
|
"logps/rejected": -1.1799728870391846, |
|
"loss": 1.6027, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.060889720916748, |
|
"rewards/margins": 0.2990562319755554, |
|
"rewards/rejected": -2.359945774078369, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.65850144092219, |
|
"grad_norm": 28.26132547261304, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -2.030214786529541, |
|
"logits/rejected": -2.021780490875244, |
|
"logps/chosen": -1.0733340978622437, |
|
"logps/rejected": -1.192913293838501, |
|
"loss": 1.6427, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1466681957244873, |
|
"rewards/margins": 0.23915806412696838, |
|
"rewards/rejected": -2.385826587677002, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.6657060518731988, |
|
"grad_norm": 31.37293000800097, |
|
"learning_rate": 1.86807387690692e-09, |
|
"logits/chosen": -2.056385040283203, |
|
"logits/rejected": -2.0531296730041504, |
|
"logps/chosen": -1.0972204208374023, |
|
"logps/rejected": -1.3030415773391724, |
|
"loss": 1.5153, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1944408416748047, |
|
"rewards/margins": 0.4116426110267639, |
|
"rewards/rejected": -2.6060831546783447, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6729106628242074, |
|
"grad_norm": 24.09942757976874, |
|
"learning_rate": 1.789385523818493e-09, |
|
"logits/chosen": -2.0195093154907227, |
|
"logits/rejected": -2.0211291313171387, |
|
"logps/chosen": -1.0491076707839966, |
|
"logps/rejected": -1.2321463823318481, |
|
"loss": 1.5545, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.098215341567993, |
|
"rewards/margins": 0.3660774827003479, |
|
"rewards/rejected": -2.4642927646636963, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.680115273775216, |
|
"grad_norm": 30.753165172362362, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -2.0431981086730957, |
|
"logits/rejected": -2.038255214691162, |
|
"logps/chosen": -1.1346651315689087, |
|
"logps/rejected": -1.2389873266220093, |
|
"loss": 1.6718, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.2693302631378174, |
|
"rewards/margins": 0.20864447951316833, |
|
"rewards/rejected": -2.4779746532440186, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.6873198847262247, |
|
"grad_norm": 25.62420725486474, |
|
"learning_rate": 1.6369090654806543e-09, |
|
"logits/chosen": -2.04803466796875, |
|
"logits/rejected": -2.041686534881592, |
|
"logps/chosen": -1.0324945449829102, |
|
"logps/rejected": -1.1873632669448853, |
|
"loss": 1.5862, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0649890899658203, |
|
"rewards/margins": 0.30973726511001587, |
|
"rewards/rejected": -2.3747265338897705, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.6945244956772334, |
|
"grad_norm": 23.41499250305915, |
|
"learning_rate": 1.5631316786966498e-09, |
|
"logits/chosen": -1.975886583328247, |
|
"logits/rejected": -1.969620704650879, |
|
"logps/chosen": -1.03388512134552, |
|
"logps/rejected": -1.184274673461914, |
|
"loss": 1.6153, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.06777024269104, |
|
"rewards/margins": 0.3007793426513672, |
|
"rewards/rejected": -2.368549346923828, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.701729106628242, |
|
"grad_norm": 22.50489621056607, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -2.0254969596862793, |
|
"logits/rejected": -2.0186665058135986, |
|
"logps/chosen": -1.0850400924682617, |
|
"logps/rejected": -1.246293067932129, |
|
"loss": 1.5906, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1700801849365234, |
|
"rewards/margins": 0.3225058913230896, |
|
"rewards/rejected": -2.492586135864258, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 33.572444771947154, |
|
"learning_rate": 1.4205245207621508e-09, |
|
"logits/chosen": -1.9717979431152344, |
|
"logits/rejected": -1.9695411920547485, |
|
"logps/chosen": -1.1266522407531738, |
|
"logps/rejected": -1.310119390487671, |
|
"loss": 1.5624, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2533044815063477, |
|
"rewards/margins": 0.3669341802597046, |
|
"rewards/rejected": -2.620238780975342, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.7161383285302594, |
|
"grad_norm": 22.152945911675502, |
|
"learning_rate": 1.3517047743059978e-09, |
|
"logits/chosen": -2.009321928024292, |
|
"logits/rejected": -2.012770891189575, |
|
"logps/chosen": -1.08241605758667, |
|
"logps/rejected": -1.2561628818511963, |
|
"loss": 1.5653, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.16483211517334, |
|
"rewards/margins": 0.34749364852905273, |
|
"rewards/rejected": -2.5123257637023926, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.723342939481268, |
|
"grad_norm": 20.70722793532355, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -2.006065845489502, |
|
"logits/rejected": -2.0020499229431152, |
|
"logps/chosen": -1.0431503057479858, |
|
"logps/rejected": -1.1901763677597046, |
|
"loss": 1.623, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0863006114959717, |
|
"rewards/margins": 0.29405173659324646, |
|
"rewards/rejected": -2.380352735519409, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7305475504322767, |
|
"grad_norm": 26.64826043638499, |
|
"learning_rate": 1.2190571347958422e-09, |
|
"logits/chosen": -2.0325839519500732, |
|
"logits/rejected": -2.033907413482666, |
|
"logps/chosen": -0.9730801582336426, |
|
"logps/rejected": -1.1865020990371704, |
|
"loss": 1.5144, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9461603164672852, |
|
"rewards/margins": 0.42684406042099, |
|
"rewards/rejected": -2.373004198074341, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.7377521613832854, |
|
"grad_norm": 21.63344100089033, |
|
"learning_rate": 1.1552385663231634e-09, |
|
"logits/chosen": -1.9884811639785767, |
|
"logits/rejected": -1.9790737628936768, |
|
"logps/chosen": -1.1036136150360107, |
|
"logps/rejected": -1.2067337036132812, |
|
"loss": 1.6742, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.2072272300720215, |
|
"rewards/margins": 0.20624017715454102, |
|
"rewards/rejected": -2.4134674072265625, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.744956772334294, |
|
"grad_norm": 23.102000729253543, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.9492992162704468, |
|
"logits/rejected": -1.95420241355896, |
|
"logps/chosen": -1.0491759777069092, |
|
"logps/rejected": -1.2016587257385254, |
|
"loss": 1.6216, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0983519554138184, |
|
"rewards/margins": 0.30496540665626526, |
|
"rewards/rejected": -2.403317451477051, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7521613832853027, |
|
"grad_norm": 20.452340499679426, |
|
"learning_rate": 1.0326343625364608e-09, |
|
"logits/chosen": -1.9641183614730835, |
|
"logits/rejected": -1.959027886390686, |
|
"logps/chosen": -1.051187515258789, |
|
"logps/rejected": -1.2369552850723267, |
|
"loss": 1.5457, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.102375030517578, |
|
"rewards/margins": 0.3715355098247528, |
|
"rewards/rejected": -2.4739105701446533, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7593659942363113, |
|
"grad_norm": 21.613705313436856, |
|
"learning_rate": 9.738573457917066e-10, |
|
"logits/chosen": -2.0394294261932373, |
|
"logits/rejected": -2.038104772567749, |
|
"logps/chosen": -1.060562252998352, |
|
"logps/rejected": -1.2662984132766724, |
|
"loss": 1.5136, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.121124505996704, |
|
"rewards/margins": 0.4114726185798645, |
|
"rewards/rejected": -2.5325968265533447, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 24.920928975195235, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -2.002856731414795, |
|
"logits/rejected": -2.000764846801758, |
|
"logps/chosen": -1.084855318069458, |
|
"logps/rejected": -1.165999174118042, |
|
"loss": 1.7045, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.169710636138916, |
|
"rewards/margins": 0.16228748857975006, |
|
"rewards/rejected": -2.331998348236084, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.7737752161383287, |
|
"grad_norm": 25.07731794836384, |
|
"learning_rate": 8.613741412168113e-10, |
|
"logits/chosen": -2.019688129425049, |
|
"logits/rejected": -2.0192506313323975, |
|
"logps/chosen": -1.0898449420928955, |
|
"logps/rejected": -1.2301945686340332, |
|
"loss": 1.6, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.179689884185791, |
|
"rewards/margins": 0.2806992828845978, |
|
"rewards/rejected": -2.4603891372680664, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7809798270893373, |
|
"grad_norm": 24.28344164460677, |
|
"learning_rate": 8.076758604914802e-10, |
|
"logits/chosen": -1.9492765665054321, |
|
"logits/rejected": -1.9450442790985107, |
|
"logps/chosen": -0.9892587661743164, |
|
"logps/rejected": -1.1346309185028076, |
|
"loss": 1.6167, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9785175323486328, |
|
"rewards/margins": 0.29074448347091675, |
|
"rewards/rejected": -2.2692618370056152, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.7881844380403455, |
|
"grad_norm": 28.329942268146258, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.9771426916122437, |
|
"logits/rejected": -1.9707549810409546, |
|
"logps/chosen": -1.0437920093536377, |
|
"logps/rejected": -1.200476884841919, |
|
"loss": 1.5889, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0875840187072754, |
|
"rewards/margins": 0.3133697509765625, |
|
"rewards/rejected": -2.400953769683838, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.795389048991354, |
|
"grad_norm": 20.835071031526898, |
|
"learning_rate": 7.053848157367315e-10, |
|
"logits/chosen": -1.991943597793579, |
|
"logits/rejected": -1.9867950677871704, |
|
"logps/chosen": -1.0485520362854004, |
|
"logps/rejected": -1.2103912830352783, |
|
"loss": 1.5969, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.097104072570801, |
|
"rewards/margins": 0.3236783444881439, |
|
"rewards/rejected": -2.4207825660705566, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.802593659942363, |
|
"grad_norm": 19.61293690218021, |
|
"learning_rate": 6.567992423453794e-10, |
|
"logits/chosen": -2.0088655948638916, |
|
"logits/rejected": -2.007509708404541, |
|
"logps/chosen": -0.9718767404556274, |
|
"logps/rejected": -1.1004300117492676, |
|
"loss": 1.6236, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9437534809112549, |
|
"rewards/margins": 0.2571064531803131, |
|
"rewards/rejected": -2.200860023498535, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.8097982708933715, |
|
"grad_norm": 23.907550449023642, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -1.9489400386810303, |
|
"logits/rejected": -1.9466136693954468, |
|
"logps/chosen": -1.0784661769866943, |
|
"logps/rejected": -1.2528491020202637, |
|
"loss": 1.5667, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1569323539733887, |
|
"rewards/margins": 0.34876567125320435, |
|
"rewards/rejected": -2.5056982040405273, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.81700288184438, |
|
"grad_norm": 28.578873657679054, |
|
"learning_rate": 5.647650701205653e-10, |
|
"logits/chosen": -2.0213611125946045, |
|
"logits/rejected": -2.0133721828460693, |
|
"logps/chosen": -1.1192106008529663, |
|
"logps/rejected": -1.291589617729187, |
|
"loss": 1.5866, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2384212017059326, |
|
"rewards/margins": 0.34475821256637573, |
|
"rewards/rejected": -2.583179235458374, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 19.41067753405638, |
|
"learning_rate": 5.213229409093856e-10, |
|
"logits/chosen": -2.0236122608184814, |
|
"logits/rejected": -2.018137216567993, |
|
"logps/chosen": -1.0645720958709717, |
|
"logps/rejected": -1.2068941593170166, |
|
"loss": 1.6192, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1291441917419434, |
|
"rewards/margins": 0.2846437692642212, |
|
"rewards/rejected": -2.413788318634033, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.8314121037463975, |
|
"grad_norm": 25.489467678642576, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -1.9871950149536133, |
|
"logits/rejected": -1.9816499948501587, |
|
"logps/chosen": -1.0776426792144775, |
|
"logps/rejected": -1.1982210874557495, |
|
"loss": 1.6416, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.155285358428955, |
|
"rewards/margins": 0.24115705490112305, |
|
"rewards/rejected": -2.396442174911499, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.838616714697406, |
|
"grad_norm": 20.30364693658417, |
|
"learning_rate": 4.3960386508631595e-10, |
|
"logits/chosen": -1.921939492225647, |
|
"logits/rejected": -1.9144452810287476, |
|
"logps/chosen": -0.979306697845459, |
|
"logps/rejected": -1.107569694519043, |
|
"loss": 1.6521, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.958613395690918, |
|
"rewards/margins": 0.2565256953239441, |
|
"rewards/rejected": -2.215139389038086, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.845821325648415, |
|
"grad_norm": 42.519384522550766, |
|
"learning_rate": 4.013326629880243e-10, |
|
"logits/chosen": -1.9709304571151733, |
|
"logits/rejected": -1.961439847946167, |
|
"logps/chosen": -1.1202346086502075, |
|
"logps/rejected": -1.2615710496902466, |
|
"loss": 1.622, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.240469217300415, |
|
"rewards/margins": 0.2826729416847229, |
|
"rewards/rejected": -2.523142099380493, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8530259365994235, |
|
"grad_norm": 23.238678680474873, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -1.9292995929718018, |
|
"logits/rejected": -1.9286911487579346, |
|
"logps/chosen": -1.061901330947876, |
|
"logps/rejected": -1.138238787651062, |
|
"loss": 1.7022, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.123802661895752, |
|
"rewards/margins": 0.1526748687028885, |
|
"rewards/rejected": -2.276477575302124, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.860230547550432, |
|
"grad_norm": 25.943257260205577, |
|
"learning_rate": 3.2998038176619e-10, |
|
"logits/chosen": -1.973475694656372, |
|
"logits/rejected": -1.9651120901107788, |
|
"logps/chosen": -1.0681226253509521, |
|
"logps/rejected": -1.200909972190857, |
|
"loss": 1.6307, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1362452507019043, |
|
"rewards/margins": 0.2655748426914215, |
|
"rewards/rejected": -2.401819944381714, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.867435158501441, |
|
"grad_norm": 25.046112858885824, |
|
"learning_rate": 2.969043184133907e-10, |
|
"logits/chosen": -2.0394976139068604, |
|
"logits/rejected": -2.0383353233337402, |
|
"logps/chosen": -0.9778817296028137, |
|
"logps/rejected": -1.2098453044891357, |
|
"loss": 1.4747, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9557634592056274, |
|
"rewards/margins": 0.46392711997032166, |
|
"rewards/rejected": -2.4196906089782715, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.8746397694524495, |
|
"grad_norm": 22.389990606212322, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -1.964787244796753, |
|
"logits/rejected": -1.9624879360198975, |
|
"logps/chosen": -0.9804242849349976, |
|
"logps/rejected": -1.1250919103622437, |
|
"loss": 1.6234, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9608485698699951, |
|
"rewards/margins": 0.28933483362197876, |
|
"rewards/rejected": -2.2501838207244873, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 24.0117386682406, |
|
"learning_rate": 2.3596397161395607e-10, |
|
"logits/chosen": -2.0428566932678223, |
|
"logits/rejected": -2.031513214111328, |
|
"logps/chosen": -1.0748224258422852, |
|
"logps/rejected": -1.255406141281128, |
|
"loss": 1.5662, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1496448516845703, |
|
"rewards/margins": 0.36116713285446167, |
|
"rewards/rejected": -2.510812282562256, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.889048991354467, |
|
"grad_norm": 32.362804953121234, |
|
"learning_rate": 2.0810397202206399e-10, |
|
"logits/chosen": -1.9443575143814087, |
|
"logits/rejected": -1.9498094320297241, |
|
"logps/chosen": -1.0714243650436401, |
|
"logps/rejected": -1.2111313343048096, |
|
"loss": 1.6118, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1428487300872803, |
|
"rewards/margins": 0.27941370010375977, |
|
"rewards/rejected": -2.422262668609619, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.8962536023054755, |
|
"grad_norm": 27.023698297841868, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -2.03318452835083, |
|
"logits/rejected": -2.0310425758361816, |
|
"logps/chosen": -1.0808742046356201, |
|
"logps/rejected": -1.2229337692260742, |
|
"loss": 1.6227, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1617484092712402, |
|
"rewards/margins": 0.284119576215744, |
|
"rewards/rejected": -2.4458675384521484, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.903458213256484, |
|
"grad_norm": 26.71011918530369, |
|
"learning_rate": 1.5761411253092382e-10, |
|
"logits/chosen": -1.9595407247543335, |
|
"logits/rejected": -1.9497658014297485, |
|
"logps/chosen": -0.9974473714828491, |
|
"logps/rejected": -1.1304714679718018, |
|
"loss": 1.6212, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9948947429656982, |
|
"rewards/margins": 0.2660483717918396, |
|
"rewards/rejected": -2.2609429359436035, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.910662824207493, |
|
"grad_norm": 24.25889585836036, |
|
"learning_rate": 1.3498780186031455e-10, |
|
"logits/chosen": -2.002887010574341, |
|
"logits/rejected": -1.9993667602539062, |
|
"logps/chosen": -1.1712630987167358, |
|
"logps/rejected": -1.301621675491333, |
|
"loss": 1.65, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.3425261974334717, |
|
"rewards/margins": 0.26071739196777344, |
|
"rewards/rejected": -2.603243350982666, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.9178674351585014, |
|
"grad_norm": 19.7696756240332, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -1.9911212921142578, |
|
"logits/rejected": -1.993401288986206, |
|
"logps/chosen": -1.0608654022216797, |
|
"logps/rejected": -1.1971817016601562, |
|
"loss": 1.6356, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1217308044433594, |
|
"rewards/margins": 0.2726329267024994, |
|
"rewards/rejected": -2.3943634033203125, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.92507204610951, |
|
"grad_norm": 23.18031609093465, |
|
"learning_rate": 9.498037123825686e-11, |
|
"logits/chosen": -2.007997989654541, |
|
"logits/rejected": -2.0048532485961914, |
|
"logps/chosen": -1.0325133800506592, |
|
"logps/rejected": -1.1695791482925415, |
|
"loss": 1.6193, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0650267601013184, |
|
"rewards/margins": 0.2741314768791199, |
|
"rewards/rejected": -2.339158296585083, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.9322766570605188, |
|
"grad_norm": 25.713514771607556, |
|
"learning_rate": 7.760206364398614e-11, |
|
"logits/chosen": -2.058072328567505, |
|
"logits/rejected": -2.0552659034729004, |
|
"logps/chosen": -1.0893595218658447, |
|
"logps/rejected": -1.244717001914978, |
|
"loss": 1.6012, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1787190437316895, |
|
"rewards/margins": 0.3107149004936218, |
|
"rewards/rejected": -2.489434003829956, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.9394812680115274, |
|
"grad_norm": 26.762519657648443, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -1.9917011260986328, |
|
"logits/rejected": -1.9884628057479858, |
|
"logps/chosen": -1.040903925895691, |
|
"logps/rejected": -1.26100492477417, |
|
"loss": 1.5122, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.081807851791382, |
|
"rewards/margins": 0.4402018189430237, |
|
"rewards/rejected": -2.52200984954834, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.946685878962536, |
|
"grad_norm": 25.259734222354364, |
|
"learning_rate": 4.810237191940625e-11, |
|
"logits/chosen": -1.9693422317504883, |
|
"logits/rejected": -1.9684251546859741, |
|
"logps/chosen": -1.0511581897735596, |
|
"logps/rejected": -1.1897703409194946, |
|
"loss": 1.6405, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.102316379547119, |
|
"rewards/margins": 0.27722451090812683, |
|
"rewards/rejected": -2.3795406818389893, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.9538904899135447, |
|
"grad_norm": 23.752154925872336, |
|
"learning_rate": 3.5983061495617476e-11, |
|
"logits/chosen": -2.024099349975586, |
|
"logits/rejected": -2.0245394706726074, |
|
"logps/chosen": -1.1337699890136719, |
|
"logps/rejected": -1.2917104959487915, |
|
"loss": 1.6, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.2675399780273438, |
|
"rewards/margins": 0.3158808946609497, |
|
"rewards/rejected": -2.583420991897583, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9610951008645534, |
|
"grad_norm": 26.090688744328226, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -2.0161375999450684, |
|
"logits/rejected": -2.005847930908203, |
|
"logps/chosen": -1.0120784044265747, |
|
"logps/rejected": -1.1921513080596924, |
|
"loss": 1.5759, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0241568088531494, |
|
"rewards/margins": 0.3601462244987488, |
|
"rewards/rejected": -2.3843026161193848, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.968299711815562, |
|
"grad_norm": 27.27849102688369, |
|
"learning_rate": 1.700977115254576e-11, |
|
"logits/chosen": -1.9999454021453857, |
|
"logits/rejected": -1.9970006942749023, |
|
"logps/chosen": -1.0086463689804077, |
|
"logps/rejected": -1.1693059206008911, |
|
"loss": 1.5846, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0172927379608154, |
|
"rewards/margins": 0.32131898403167725, |
|
"rewards/rejected": -2.3386118412017822, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9755043227665707, |
|
"grad_norm": 24.842873331046228, |
|
"learning_rate": 1.0157124977230868e-11, |
|
"logits/chosen": -1.9786033630371094, |
|
"logits/rejected": -1.977442979812622, |
|
"logps/chosen": -0.9792217016220093, |
|
"logps/rejected": -1.137369990348816, |
|
"loss": 1.5873, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9584434032440186, |
|
"rewards/margins": 0.3162967264652252, |
|
"rewards/rejected": -2.274739980697632, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9827089337175794, |
|
"grad_norm": 25.86082420427953, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -1.9983164072036743, |
|
"logits/rejected": -1.9942150115966797, |
|
"logps/chosen": -1.0641577243804932, |
|
"logps/rejected": -1.1579291820526123, |
|
"loss": 1.6924, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1283154487609863, |
|
"rewards/margins": 0.18754300475120544, |
|
"rewards/rejected": -2.3158583641052246, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.989913544668588, |
|
"grad_norm": 25.593515700161753, |
|
"learning_rate": 1.7222391488297406e-12, |
|
"logits/chosen": -2.018054246902466, |
|
"logits/rejected": -2.0146076679229736, |
|
"logps/chosen": -1.1173573732376099, |
|
"logps/rejected": -1.27747642993927, |
|
"loss": 1.591, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.2347147464752197, |
|
"rewards/margins": 0.32023778557777405, |
|
"rewards/rejected": -2.55495285987854, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 23.587314737717417, |
|
"learning_rate": 1.4059243338693238e-13, |
|
"logits/chosen": -1.99405038356781, |
|
"logits/rejected": -1.9872567653656006, |
|
"logps/chosen": -1.06746244430542, |
|
"logps/rejected": -1.2030757665634155, |
|
"loss": 1.6158, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.13492488861084, |
|
"rewards/margins": 0.27122682332992554, |
|
"rewards/rejected": -2.406151533126831, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4164, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6295961157904577, |
|
"train_runtime": 5477.0573, |
|
"train_samples_per_second": 12.161, |
|
"train_steps_per_second": 0.76 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|