|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9964868029907215, |
|
"eval_steps": 800, |
|
"global_step": 2079, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014413115935501305, |
|
"grad_norm": 21.287893295288086, |
|
"learning_rate": 2.403846153846154e-09, |
|
"logits/chosen": -2.3065450191497803, |
|
"logits/rejected": -2.3093364238739014, |
|
"logps/chosen": -43.837303161621094, |
|
"logps/rejected": -48.05693054199219, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.0625, |
|
"rewards/chosen": 9.900308214128017e-06, |
|
"rewards/margins": 0.0009647191036492586, |
|
"rewards/rejected": -0.0009548187954351306, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.014413115935501306, |
|
"grad_norm": 21.087011337280273, |
|
"learning_rate": 2.403846153846154e-08, |
|
"logits/chosen": -2.3277149200439453, |
|
"logits/rejected": -2.3011789321899414, |
|
"logps/chosen": -42.81745910644531, |
|
"logps/rejected": -44.89339065551758, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -0.001182637526653707, |
|
"rewards/margins": 0.0011362915392965078, |
|
"rewards/rejected": -0.0023189291823655367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02882623187100261, |
|
"grad_norm": 19.580371856689453, |
|
"learning_rate": 4.807692307692308e-08, |
|
"logits/chosen": -2.2883663177490234, |
|
"logits/rejected": -2.2757415771484375, |
|
"logps/chosen": -45.45596694946289, |
|
"logps/rejected": -48.15468978881836, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0008312638965435326, |
|
"rewards/margins": -0.0028335480019450188, |
|
"rewards/rejected": 0.0020022839307785034, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04323934780650392, |
|
"grad_norm": 26.840009689331055, |
|
"learning_rate": 7.21153846153846e-08, |
|
"logits/chosen": -2.315314531326294, |
|
"logits/rejected": -2.3029096126556396, |
|
"logps/chosen": -46.84910202026367, |
|
"logps/rejected": -48.4326286315918, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.00018312002066522837, |
|
"rewards/margins": 0.0015712290769442916, |
|
"rewards/rejected": -0.00175434909760952, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05765246374200522, |
|
"grad_norm": 22.58620834350586, |
|
"learning_rate": 9.615384615384616e-08, |
|
"logits/chosen": -2.347716808319092, |
|
"logits/rejected": -2.338416576385498, |
|
"logps/chosen": -50.591617584228516, |
|
"logps/rejected": -52.742095947265625, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0013993385946378112, |
|
"rewards/margins": 0.0010021533817052841, |
|
"rewards/rejected": 0.0003971853293478489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07206557967750653, |
|
"grad_norm": 23.823856353759766, |
|
"learning_rate": 1.2019230769230769e-07, |
|
"logits/chosen": -2.329172134399414, |
|
"logits/rejected": -2.3224873542785645, |
|
"logps/chosen": -47.3341178894043, |
|
"logps/rejected": -49.947471618652344, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.002387039829045534, |
|
"rewards/margins": 0.0015624122461304069, |
|
"rewards/rejected": 0.0008246281067840755, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08647869561300783, |
|
"grad_norm": 18.55199432373047, |
|
"learning_rate": 1.442307692307692e-07, |
|
"logits/chosen": -2.3057174682617188, |
|
"logits/rejected": -2.287588596343994, |
|
"logps/chosen": -46.57988739013672, |
|
"logps/rejected": -48.87944793701172, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001985303359106183, |
|
"rewards/margins": 0.0009407905163243413, |
|
"rewards/rejected": 0.00104451272636652, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10089181154850914, |
|
"grad_norm": 18.088035583496094, |
|
"learning_rate": 1.6826923076923077e-07, |
|
"logits/chosen": -2.3419766426086426, |
|
"logits/rejected": -2.3270087242126465, |
|
"logps/chosen": -47.5944709777832, |
|
"logps/rejected": -50.76883316040039, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0018801375990733504, |
|
"rewards/margins": 0.0026488774456083775, |
|
"rewards/rejected": -0.0007687400793656707, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11530492748401044, |
|
"grad_norm": 18.39251708984375, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"logits/chosen": -2.335756778717041, |
|
"logits/rejected": -2.3095576763153076, |
|
"logps/chosen": -44.72612380981445, |
|
"logps/rejected": -48.02496337890625, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.006220139563083649, |
|
"rewards/margins": 0.007775151636451483, |
|
"rewards/rejected": -0.001555012189783156, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12971804341951176, |
|
"grad_norm": 18.626569747924805, |
|
"learning_rate": 2.1634615384615386e-07, |
|
"logits/chosen": -2.3171792030334473, |
|
"logits/rejected": -2.291064500808716, |
|
"logps/chosen": -44.88652420043945, |
|
"logps/rejected": -46.83210372924805, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.008590105921030045, |
|
"rewards/margins": 0.00671065878123045, |
|
"rewards/rejected": 0.0018794465577229857, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14413115935501306, |
|
"grad_norm": 23.2231388092041, |
|
"learning_rate": 2.4038461538461537e-07, |
|
"logits/chosen": -2.383881092071533, |
|
"logits/rejected": -2.377704620361328, |
|
"logps/chosen": -42.710289001464844, |
|
"logps/rejected": -46.196533203125, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.014623338356614113, |
|
"rewards/margins": 0.008355258964002132, |
|
"rewards/rejected": 0.006268080323934555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15854427529051437, |
|
"grad_norm": 19.625394821166992, |
|
"learning_rate": 2.6442307692307694e-07, |
|
"logits/chosen": -2.310715436935425, |
|
"logits/rejected": -2.3002336025238037, |
|
"logps/chosen": -45.03856658935547, |
|
"logps/rejected": -47.96285629272461, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.01648247428238392, |
|
"rewards/margins": 0.015830885618925095, |
|
"rewards/rejected": 0.0006515888380818069, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17295739122601567, |
|
"grad_norm": 22.700777053833008, |
|
"learning_rate": 2.884615384615384e-07, |
|
"logits/chosen": -2.339622974395752, |
|
"logits/rejected": -2.326411485671997, |
|
"logps/chosen": -46.59340286254883, |
|
"logps/rejected": -49.68640899658203, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.020804349333047867, |
|
"rewards/margins": 0.016823848709464073, |
|
"rewards/rejected": 0.003980500157922506, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18737050716151699, |
|
"grad_norm": 25.500030517578125, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.286569833755493, |
|
"logits/rejected": -2.2731103897094727, |
|
"logps/chosen": -49.55046844482422, |
|
"logps/rejected": -51.0811767578125, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.030641257762908936, |
|
"rewards/margins": 0.027367204427719116, |
|
"rewards/rejected": 0.0032740526366978884, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20178362309701828, |
|
"grad_norm": 19.97886848449707, |
|
"learning_rate": 3.3653846153846154e-07, |
|
"logits/chosen": -2.340399980545044, |
|
"logits/rejected": -2.3109829425811768, |
|
"logps/chosen": -44.300235748291016, |
|
"logps/rejected": -46.56055450439453, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.032056886702775955, |
|
"rewards/margins": 0.037834975868463516, |
|
"rewards/rejected": -0.005778087303042412, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2161967390325196, |
|
"grad_norm": 19.921979904174805, |
|
"learning_rate": 3.6057692307692306e-07, |
|
"logits/chosen": -2.3215255737304688, |
|
"logits/rejected": -2.297445297241211, |
|
"logps/chosen": -46.028289794921875, |
|
"logps/rejected": -48.2182731628418, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.03457826003432274, |
|
"rewards/margins": 0.03234432265162468, |
|
"rewards/rejected": 0.002233942272141576, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2306098549680209, |
|
"grad_norm": 21.054500579833984, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -2.2929115295410156, |
|
"logits/rejected": -2.277684450149536, |
|
"logps/chosen": -47.141380310058594, |
|
"logps/rejected": -50.77402114868164, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.04410446435213089, |
|
"rewards/margins": 0.04075505584478378, |
|
"rewards/rejected": 0.0033494061790406704, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2450229709035222, |
|
"grad_norm": 20.726028442382812, |
|
"learning_rate": 4.0865384615384614e-07, |
|
"logits/chosen": -2.33288836479187, |
|
"logits/rejected": -2.3155179023742676, |
|
"logps/chosen": -50.497257232666016, |
|
"logps/rejected": -50.81693649291992, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04269781708717346, |
|
"rewards/margins": 0.040725283324718475, |
|
"rewards/rejected": 0.0019725344609469175, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2594360868390235, |
|
"grad_norm": 23.03353500366211, |
|
"learning_rate": 4.326923076923077e-07, |
|
"logits/chosen": -2.286454677581787, |
|
"logits/rejected": -2.268066883087158, |
|
"logps/chosen": -49.3195915222168, |
|
"logps/rejected": -52.67781448364258, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.046625155955553055, |
|
"rewards/margins": 0.056254588067531586, |
|
"rewards/rejected": -0.009629428386688232, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2738492027745248, |
|
"grad_norm": 21.245960235595703, |
|
"learning_rate": 4.567307692307692e-07, |
|
"logits/chosen": -2.3012168407440186, |
|
"logits/rejected": -2.287529468536377, |
|
"logps/chosen": -47.20839309692383, |
|
"logps/rejected": -50.67589569091797, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.046871501952409744, |
|
"rewards/margins": 0.06035640090703964, |
|
"rewards/rejected": -0.013484900817275047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2882623187100261, |
|
"grad_norm": 23.244338989257812, |
|
"learning_rate": 4.807692307692307e-07, |
|
"logits/chosen": -2.324427843093872, |
|
"logits/rejected": -2.3083744049072266, |
|
"logps/chosen": -46.15428924560547, |
|
"logps/rejected": -49.030723571777344, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.040220100432634354, |
|
"rewards/margins": 0.06216844171285629, |
|
"rewards/rejected": -0.02194834314286709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30267543464552743, |
|
"grad_norm": 20.938106536865234, |
|
"learning_rate": 4.999985903160127e-07, |
|
"logits/chosen": -2.3429839611053467, |
|
"logits/rejected": -2.3429884910583496, |
|
"logps/chosen": -46.21355438232422, |
|
"logps/rejected": -49.741477966308594, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.03884850814938545, |
|
"rewards/margins": 0.08172162622213364, |
|
"rewards/rejected": -0.04287312179803848, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31708855058102875, |
|
"grad_norm": 22.547351837158203, |
|
"learning_rate": 4.999492530456938e-07, |
|
"logits/chosen": -2.2629776000976562, |
|
"logits/rejected": -2.247462034225464, |
|
"logps/chosen": -45.79121780395508, |
|
"logps/rejected": -48.56629943847656, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.02658682130277157, |
|
"rewards/margins": 0.10048248618841171, |
|
"rewards/rejected": -0.07389567047357559, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33150166651653007, |
|
"grad_norm": 28.69328498840332, |
|
"learning_rate": 4.998294474728773e-07, |
|
"logits/chosen": -2.3137152194976807, |
|
"logits/rejected": -2.2834296226501465, |
|
"logps/chosen": -53.14280319213867, |
|
"logps/rejected": -54.3192024230957, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.03892205283045769, |
|
"rewards/margins": 0.12172921746969223, |
|
"rewards/rejected": -0.08280716836452484, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34591478245203133, |
|
"grad_norm": 23.599994659423828, |
|
"learning_rate": 4.996392073744008e-07, |
|
"logits/chosen": -2.3293533325195312, |
|
"logits/rejected": -2.2977442741394043, |
|
"logps/chosen": -47.376712799072266, |
|
"logps/rejected": -50.33088302612305, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": 0.02417893335223198, |
|
"rewards/margins": 0.12420248985290527, |
|
"rewards/rejected": -0.1000235453248024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36032789838753265, |
|
"grad_norm": 21.964677810668945, |
|
"learning_rate": 4.993785863847387e-07, |
|
"logits/chosen": -2.2910289764404297, |
|
"logits/rejected": -2.266993284225464, |
|
"logps/chosen": -44.49908447265625, |
|
"logps/rejected": -49.51002883911133, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.010445142164826393, |
|
"rewards/margins": 0.18903522193431854, |
|
"rewards/rejected": -0.1785901039838791, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37474101432303397, |
|
"grad_norm": 24.81599998474121, |
|
"learning_rate": 4.99047657980881e-07, |
|
"logits/chosen": -2.2835147380828857, |
|
"logits/rejected": -2.2653086185455322, |
|
"logps/chosen": -50.46863555908203, |
|
"logps/rejected": -54.02223587036133, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.045146115124225616, |
|
"rewards/margins": 0.15290267765522003, |
|
"rewards/rejected": -0.19804877042770386, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3891541302585353, |
|
"grad_norm": 26.260005950927734, |
|
"learning_rate": 4.986465154616175e-07, |
|
"logits/chosen": -2.2700905799865723, |
|
"logits/rejected": -2.242027759552002, |
|
"logps/chosen": -46.41443634033203, |
|
"logps/rejected": -49.876991271972656, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.017268601804971695, |
|
"rewards/margins": 0.1998191624879837, |
|
"rewards/rejected": -0.2170877754688263, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.40356724619403656, |
|
"grad_norm": 24.382686614990234, |
|
"learning_rate": 4.981752719212347e-07, |
|
"logits/chosen": -2.2248587608337402, |
|
"logits/rejected": -2.210576295852661, |
|
"logps/chosen": -48.873863220214844, |
|
"logps/rejected": -51.1252555847168, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03884928673505783, |
|
"rewards/margins": 0.15560956299304962, |
|
"rewards/rejected": -0.19445885717868805, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4179803621295379, |
|
"grad_norm": 21.578123092651367, |
|
"learning_rate": 4.976340602176303e-07, |
|
"logits/chosen": -2.2483863830566406, |
|
"logits/rejected": -2.216209650039673, |
|
"logps/chosen": -48.28716278076172, |
|
"logps/rejected": -52.282020568847656, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06292366981506348, |
|
"rewards/margins": 0.23313823342323303, |
|
"rewards/rejected": -0.2960619330406189, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4323934780650392, |
|
"grad_norm": 26.12675666809082, |
|
"learning_rate": 4.970230329348574e-07, |
|
"logits/chosen": -2.246577262878418, |
|
"logits/rejected": -2.2352359294891357, |
|
"logps/chosen": -47.72701644897461, |
|
"logps/rejected": -55.13080596923828, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.11627298593521118, |
|
"rewards/margins": 0.2843998968601227, |
|
"rewards/rejected": -0.4006728231906891, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4468065940005405, |
|
"grad_norm": 23.437192916870117, |
|
"learning_rate": 4.963423623401058e-07, |
|
"logits/chosen": -2.2119696140289307, |
|
"logits/rejected": -2.1862404346466064, |
|
"logps/chosen": -47.60432434082031, |
|
"logps/rejected": -51.17278289794922, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10928714275360107, |
|
"rewards/margins": 0.22730882465839386, |
|
"rewards/rejected": -0.33659598231315613, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4612197099360418, |
|
"grad_norm": 25.831727981567383, |
|
"learning_rate": 4.955922403351345e-07, |
|
"logits/chosen": -2.204767942428589, |
|
"logits/rejected": -2.196760892868042, |
|
"logps/chosen": -45.66215896606445, |
|
"logps/rejected": -51.39593505859375, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13322503864765167, |
|
"rewards/margins": 0.2752975821495056, |
|
"rewards/rejected": -0.4085226058959961, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4756328258715431, |
|
"grad_norm": 27.61099624633789, |
|
"learning_rate": 4.947728784021693e-07, |
|
"logits/chosen": -2.219931125640869, |
|
"logits/rejected": -2.2032651901245117, |
|
"logps/chosen": -47.976158142089844, |
|
"logps/rejected": -52.288734436035156, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.15533845126628876, |
|
"rewards/margins": 0.29920583963394165, |
|
"rewards/rejected": -0.4545443654060364, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4900459418070444, |
|
"grad_norm": 27.884634017944336, |
|
"learning_rate": 4.938845075442793e-07, |
|
"logits/chosen": -2.1479032039642334, |
|
"logits/rejected": -2.1212592124938965, |
|
"logps/chosen": -51.559059143066406, |
|
"logps/rejected": -55.076637268066406, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.16438238322734833, |
|
"rewards/margins": 0.29998037219047546, |
|
"rewards/rejected": -0.4643628001213074, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5044590577425457, |
|
"grad_norm": 21.843393325805664, |
|
"learning_rate": 4.929273782202499e-07, |
|
"logits/chosen": -2.188169002532959, |
|
"logits/rejected": -2.1698548793792725, |
|
"logps/chosen": -49.47840881347656, |
|
"logps/rejected": -55.17237091064453, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.21972529590129852, |
|
"rewards/margins": 0.2996353209018707, |
|
"rewards/rejected": -0.5193605422973633, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.518872173678047, |
|
"grad_norm": 28.664966583251953, |
|
"learning_rate": 4.919017602739709e-07, |
|
"logits/chosen": -2.139723539352417, |
|
"logits/rejected": -2.1237754821777344, |
|
"logps/chosen": -48.365455627441406, |
|
"logps/rejected": -54.59684371948242, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29085400700569153, |
|
"rewards/margins": 0.37286603450775146, |
|
"rewards/rejected": -0.6637200117111206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5332852896135484, |
|
"grad_norm": 34.82406997680664, |
|
"learning_rate": 4.908079428583598e-07, |
|
"logits/chosen": -2.1522464752197266, |
|
"logits/rejected": -2.134714126586914, |
|
"logps/chosen": -51.907684326171875, |
|
"logps/rejected": -55.71944046020508, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3955734372138977, |
|
"rewards/margins": 0.3038731813430786, |
|
"rewards/rejected": -0.6994466185569763, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5476984055490496, |
|
"grad_norm": 31.894775390625, |
|
"learning_rate": 4.8964623435384e-07, |
|
"logits/chosen": -2.1320347785949707, |
|
"logits/rejected": -2.1204841136932373, |
|
"logps/chosen": -50.67914962768555, |
|
"logps/rejected": -56.83320999145508, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.33134475350379944, |
|
"rewards/margins": 0.41313639283180237, |
|
"rewards/rejected": -0.7444812059402466, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5621115214845509, |
|
"grad_norm": 22.113506317138672, |
|
"learning_rate": 4.884169622813997e-07, |
|
"logits/chosen": -2.0845208168029785, |
|
"logits/rejected": -2.0749855041503906, |
|
"logps/chosen": -50.413814544677734, |
|
"logps/rejected": -57.11311721801758, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.4426320493221283, |
|
"rewards/margins": 0.4605481028556824, |
|
"rewards/rejected": -0.9031801223754883, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5765246374200522, |
|
"grad_norm": 29.475505828857422, |
|
"learning_rate": 4.87120473210253e-07, |
|
"logits/chosen": -2.1424341201782227, |
|
"logits/rejected": -2.1282877922058105, |
|
"logps/chosen": -52.59001922607422, |
|
"logps/rejected": -59.03765869140625, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.4012625813484192, |
|
"rewards/margins": 0.43023520708084106, |
|
"rewards/rejected": -0.8314977884292603, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5909377533555535, |
|
"grad_norm": 31.90033721923828, |
|
"learning_rate": 4.857571326601322e-07, |
|
"logits/chosen": -2.121933698654175, |
|
"logits/rejected": -2.108666181564331, |
|
"logps/chosen": -49.52949523925781, |
|
"logps/rejected": -55.079627990722656, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.35214418172836304, |
|
"rewards/margins": 0.37995508313179016, |
|
"rewards/rejected": -0.7320992350578308, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6053508692910549, |
|
"grad_norm": 22.285457611083984, |
|
"learning_rate": 4.843273249982365e-07, |
|
"logits/chosen": -2.1331048011779785, |
|
"logits/rejected": -2.1248154640197754, |
|
"logps/chosen": -47.69127655029297, |
|
"logps/rejected": -53.04596710205078, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.29617828130722046, |
|
"rewards/margins": 0.39620500802993774, |
|
"rewards/rejected": -0.6923832297325134, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6197639852265562, |
|
"grad_norm": 30.609222412109375, |
|
"learning_rate": 4.828314533308668e-07, |
|
"logits/chosen": -2.1201605796813965, |
|
"logits/rejected": -2.1039023399353027, |
|
"logps/chosen": -55.71925735473633, |
|
"logps/rejected": -61.24989700317383, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.3478863537311554, |
|
"rewards/margins": 0.41252002120018005, |
|
"rewards/rejected": -0.7604063749313354, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6341771011620575, |
|
"grad_norm": 34.9681282043457, |
|
"learning_rate": 4.812699393897779e-07, |
|
"logits/chosen": -2.113286256790161, |
|
"logits/rejected": -2.1023306846618652, |
|
"logps/chosen": -50.840431213378906, |
|
"logps/rejected": -56.244529724121094, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3603426516056061, |
|
"rewards/margins": 0.34980452060699463, |
|
"rewards/rejected": -0.7101471424102783, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6485902170975588, |
|
"grad_norm": 40.18833541870117, |
|
"learning_rate": 4.796432234132802e-07, |
|
"logits/chosen": -2.14215350151062, |
|
"logits/rejected": -2.132025718688965, |
|
"logps/chosen": -52.24169921875, |
|
"logps/rejected": -57.869903564453125, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.34826427698135376, |
|
"rewards/margins": 0.3406437039375305, |
|
"rewards/rejected": -0.6889079809188843, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6630033330330601, |
|
"grad_norm": 29.3867244720459, |
|
"learning_rate": 4.77951764022122e-07, |
|
"logits/chosen": -2.0976574420928955, |
|
"logits/rejected": -2.077042579650879, |
|
"logps/chosen": -49.5283203125, |
|
"logps/rejected": -55.05694580078125, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.3073193430900574, |
|
"rewards/margins": 0.3983311057090759, |
|
"rewards/rejected": -0.7056504487991333, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6774164489685613, |
|
"grad_norm": 26.749961853027344, |
|
"learning_rate": 4.7619603809019113e-07, |
|
"logits/chosen": -2.152034282684326, |
|
"logits/rejected": -2.134824275970459, |
|
"logps/chosen": -54.220680236816406, |
|
"logps/rejected": -59.50426483154297, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3346361815929413, |
|
"rewards/margins": 0.3874064087867737, |
|
"rewards/rejected": -0.7220426201820374, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6918295649040627, |
|
"grad_norm": 33.738075256347656, |
|
"learning_rate": 4.7437654061006917e-07, |
|
"logits/chosen": -2.104640007019043, |
|
"logits/rejected": -2.099822521209717, |
|
"logps/chosen": -52.46540069580078, |
|
"logps/rejected": -59.32879638671875, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4632749557495117, |
|
"rewards/margins": 0.3506197929382324, |
|
"rewards/rejected": -0.8138947486877441, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.706242680839564, |
|
"grad_norm": 26.89373016357422, |
|
"learning_rate": 4.7249378455347857e-07, |
|
"logits/chosen": -2.1046807765960693, |
|
"logits/rejected": -2.099517583847046, |
|
"logps/chosen": -51.28865432739258, |
|
"logps/rejected": -59.29227828979492, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3435845673084259, |
|
"rewards/margins": 0.4029726982116699, |
|
"rewards/rejected": -0.7465572357177734, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7206557967750653, |
|
"grad_norm": 33.128177642822266, |
|
"learning_rate": 4.7054830072665973e-07, |
|
"logits/chosen": -2.086678981781006, |
|
"logits/rejected": -2.07316255569458, |
|
"logps/chosen": -52.46559524536133, |
|
"logps/rejected": -57.55824661254883, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.34209367632865906, |
|
"rewards/margins": 0.35461074113845825, |
|
"rewards/rejected": -0.6967044472694397, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7350689127105666, |
|
"grad_norm": 30.27744483947754, |
|
"learning_rate": 4.6854063762072106e-07, |
|
"logits/chosen": -2.076245069503784, |
|
"logits/rejected": -2.0618348121643066, |
|
"logps/chosen": -48.739891052246094, |
|
"logps/rejected": -54.260215759277344, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.3690701127052307, |
|
"rewards/margins": 0.3871278464794159, |
|
"rewards/rejected": -0.7561979293823242, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7494820286460679, |
|
"grad_norm": 23.903032302856445, |
|
"learning_rate": 4.664713612570021e-07, |
|
"logits/chosen": -2.0651895999908447, |
|
"logits/rejected": -2.0512535572052, |
|
"logps/chosen": -48.935279846191406, |
|
"logps/rejected": -55.669349670410156, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3753257393836975, |
|
"rewards/margins": 0.43598484992980957, |
|
"rewards/rejected": -0.8113106489181519, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7638951445815693, |
|
"grad_norm": 26.094806671142578, |
|
"learning_rate": 4.6434105502749533e-07, |
|
"logits/chosen": -2.0776233673095703, |
|
"logits/rejected": -2.047234296798706, |
|
"logps/chosen": -48.24291229248047, |
|
"logps/rejected": -54.696556091308594, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.36777353286743164, |
|
"rewards/margins": 0.4470536708831787, |
|
"rewards/rejected": -0.8148272633552551, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7783082605170706, |
|
"grad_norm": 29.16175079345703, |
|
"learning_rate": 4.621503195303701e-07, |
|
"logits/chosen": -2.1113364696502686, |
|
"logits/rejected": -2.104959726333618, |
|
"logps/chosen": -54.480430603027344, |
|
"logps/rejected": -61.843658447265625, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.31826671957969666, |
|
"rewards/margins": 0.41609320044517517, |
|
"rewards/rejected": -0.7343599200248718, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7927213764525718, |
|
"grad_norm": 27.188947677612305, |
|
"learning_rate": 4.598997724006456e-07, |
|
"logits/chosen": -2.115569591522217, |
|
"logits/rejected": -2.097104072570801, |
|
"logps/chosen": -53.17041778564453, |
|
"logps/rejected": -60.1922721862793, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.2969083786010742, |
|
"rewards/margins": 0.47897768020629883, |
|
"rewards/rejected": -0.7758861184120178, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8071344923880731, |
|
"grad_norm": 28.390932083129883, |
|
"learning_rate": 4.5759004813606083e-07, |
|
"logits/chosen": -2.035731554031372, |
|
"logits/rejected": -2.0177547931671143, |
|
"logps/chosen": -50.56719970703125, |
|
"logps/rejected": -56.478172302246094, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.4207974374294281, |
|
"rewards/margins": 0.46362677216529846, |
|
"rewards/rejected": -0.8844242095947266, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8215476083235744, |
|
"grad_norm": 26.5084285736084, |
|
"learning_rate": 4.5522179791819036e-07, |
|
"logits/chosen": -2.0884745121002197, |
|
"logits/rejected": -2.078564167022705, |
|
"logps/chosen": -49.79915237426758, |
|
"logps/rejected": -55.561363220214844, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.4339517652988434, |
|
"rewards/margins": 0.40265020728111267, |
|
"rewards/rejected": -0.8366019129753113, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8359607242590757, |
|
"grad_norm": 28.170623779296875, |
|
"learning_rate": 4.527956894288564e-07, |
|
"logits/chosen": -2.0642967224121094, |
|
"logits/rejected": -2.0509092807769775, |
|
"logps/chosen": -53.66132354736328, |
|
"logps/rejected": -58.7640266418457, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.3676076829433441, |
|
"rewards/margins": 0.39320507645606995, |
|
"rewards/rejected": -0.7608126401901245, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8503738401945771, |
|
"grad_norm": 39.17332077026367, |
|
"learning_rate": 4.503124066618891e-07, |
|
"logits/chosen": -2.1024057865142822, |
|
"logits/rejected": -2.072375535964966, |
|
"logps/chosen": -52.28911590576172, |
|
"logps/rejected": -56.1530647277832, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.4251914918422699, |
|
"rewards/margins": 0.3595213294029236, |
|
"rewards/rejected": -0.7847127914428711, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8647869561300784, |
|
"grad_norm": 30.457704544067383, |
|
"learning_rate": 4.4777264973028763e-07, |
|
"logits/chosen": -2.0802459716796875, |
|
"logits/rejected": -2.054129123687744, |
|
"logps/chosen": -55.20709228515625, |
|
"logps/rejected": -60.87419891357422, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.45640072226524353, |
|
"rewards/margins": 0.43457716703414917, |
|
"rewards/rejected": -0.8909778594970703, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8792000720655797, |
|
"grad_norm": 24.48934555053711, |
|
"learning_rate": 4.4517713466883733e-07, |
|
"logits/chosen": -2.0442166328430176, |
|
"logits/rejected": -2.0350658893585205, |
|
"logps/chosen": -49.768959045410156, |
|
"logps/rejected": -56.72099685668945, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.44116124510765076, |
|
"rewards/margins": 0.39726558327674866, |
|
"rewards/rejected": -0.8384267687797546, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.893613188001081, |
|
"grad_norm": 31.750471115112305, |
|
"learning_rate": 4.425265932322374e-07, |
|
"logits/chosen": -2.050736904144287, |
|
"logits/rejected": -2.0227198600769043, |
|
"logps/chosen": -50.26643753051758, |
|
"logps/rejected": -57.786712646484375, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.40668320655822754, |
|
"rewards/margins": 0.5360392332077026, |
|
"rewards/rejected": -0.9427223205566406, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9080263039365822, |
|
"grad_norm": 26.52891731262207, |
|
"learning_rate": 4.3982177268879713e-07, |
|
"logits/chosen": -2.0627334117889404, |
|
"logits/rejected": -2.0695691108703613, |
|
"logps/chosen": -53.98938751220703, |
|
"logps/rejected": -62.720008850097656, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.5125323534011841, |
|
"rewards/margins": 0.3868643641471863, |
|
"rewards/rejected": -0.8993967771530151, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9224394198720836, |
|
"grad_norm": 33.650299072265625, |
|
"learning_rate": 4.370634356097582e-07, |
|
"logits/chosen": -2.04744553565979, |
|
"logits/rejected": -2.026310443878174, |
|
"logps/chosen": -51.46614456176758, |
|
"logps/rejected": -59.32075881958008, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.5189432501792908, |
|
"rewards/margins": 0.5335994958877563, |
|
"rewards/rejected": -1.0525426864624023, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9368525358075849, |
|
"grad_norm": 27.66231346130371, |
|
"learning_rate": 4.3425235965430267e-07, |
|
"logits/chosen": -2.044187545776367, |
|
"logits/rejected": -2.0403525829315186, |
|
"logps/chosen": -53.0405158996582, |
|
"logps/rejected": -59.9106330871582, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6265466213226318, |
|
"rewards/margins": 0.45962271094322205, |
|
"rewards/rejected": -1.0861692428588867, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9512656517430862, |
|
"grad_norm": 20.190792083740234, |
|
"learning_rate": 4.3138933735030723e-07, |
|
"logits/chosen": -2.0359790325164795, |
|
"logits/rejected": -2.0277514457702637, |
|
"logps/chosen": -50.206233978271484, |
|
"logps/rejected": -58.02119827270508, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5592349767684937, |
|
"rewards/margins": 0.50580894947052, |
|
"rewards/rejected": -1.0650438070297241, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9656787676785875, |
|
"grad_norm": 27.47838020324707, |
|
"learning_rate": 4.284751758709052e-07, |
|
"logits/chosen": -2.0135226249694824, |
|
"logits/rejected": -1.993334174156189, |
|
"logps/chosen": -55.60878372192383, |
|
"logps/rejected": -62.4648323059082, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.6603598594665527, |
|
"rewards/margins": 0.49340876936912537, |
|
"rewards/rejected": -1.153768539428711, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9800918836140888, |
|
"grad_norm": 27.680631637573242, |
|
"learning_rate": 4.255106968069201e-07, |
|
"logits/chosen": -1.9804503917694092, |
|
"logits/rejected": -1.9623844623565674, |
|
"logps/chosen": -53.62810134887695, |
|
"logps/rejected": -59.58030319213867, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.5149959325790405, |
|
"rewards/margins": 0.5002261400222778, |
|
"rewards/rejected": -1.0152220726013184, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9945049995495902, |
|
"grad_norm": 27.70201873779297, |
|
"learning_rate": 4.2249673593523427e-07, |
|
"logits/chosen": -2.009028673171997, |
|
"logits/rejected": -1.9938583374023438, |
|
"logps/chosen": -51.165802001953125, |
|
"logps/rejected": -57.561912536621094, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5677297711372375, |
|
"rewards/margins": 0.3783959150314331, |
|
"rewards/rejected": -0.9461256265640259, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0089181154850915, |
|
"grad_norm": 22.3559627532959, |
|
"learning_rate": 4.194341429831576e-07, |
|
"logits/chosen": -1.9959580898284912, |
|
"logits/rejected": -1.9894912242889404, |
|
"logps/chosen": -50.8765754699707, |
|
"logps/rejected": -58.38984298706055, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.47180747985839844, |
|
"rewards/margins": 0.547687828540802, |
|
"rewards/rejected": -1.0194952487945557, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0233312314205927, |
|
"grad_norm": 26.511709213256836, |
|
"learning_rate": 4.163237813888639e-07, |
|
"logits/chosen": -1.9872064590454102, |
|
"logits/rejected": -1.9805561304092407, |
|
"logps/chosen": -51.93730545043945, |
|
"logps/rejected": -61.05963134765625, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.45482367277145386, |
|
"rewards/margins": 0.6706832647323608, |
|
"rewards/rejected": -1.1255069971084595, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.037744347356094, |
|
"grad_norm": 23.604045867919922, |
|
"learning_rate": 4.1316652805796103e-07, |
|
"logits/chosen": -2.0082569122314453, |
|
"logits/rejected": -1.9880993366241455, |
|
"logps/chosen": -53.464210510253906, |
|
"logps/rejected": -60.89324951171875, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5722960233688354, |
|
"rewards/margins": 0.7038034796714783, |
|
"rewards/rejected": -1.276099443435669, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0521574632915953, |
|
"grad_norm": 27.071109771728516, |
|
"learning_rate": 4.09963273116265e-07, |
|
"logits/chosen": -1.957069993019104, |
|
"logits/rejected": -1.946636438369751, |
|
"logps/chosen": -51.59601974487305, |
|
"logps/rejected": -63.518707275390625, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6144558787345886, |
|
"rewards/margins": 0.7481251358985901, |
|
"rewards/rejected": -1.3625810146331787, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0665705792270967, |
|
"grad_norm": 25.10072135925293, |
|
"learning_rate": 4.0671491965884575e-07, |
|
"logits/chosen": -1.903646469116211, |
|
"logits/rejected": -1.879151701927185, |
|
"logps/chosen": -50.908939361572266, |
|
"logps/rejected": -60.83250045776367, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5956512689590454, |
|
"rewards/margins": 0.7306423783302307, |
|
"rewards/rejected": -1.326293706893921, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.080983695162598, |
|
"grad_norm": 27.48199462890625, |
|
"learning_rate": 4.034223834954178e-07, |
|
"logits/chosen": -1.883504867553711, |
|
"logits/rejected": -1.8606479167938232, |
|
"logps/chosen": -56.974342346191406, |
|
"logps/rejected": -66.8110580444336, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.7325400114059448, |
|
"rewards/margins": 0.8243793249130249, |
|
"rewards/rejected": -1.5569193363189697, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0953968110980994, |
|
"grad_norm": 37.682456970214844, |
|
"learning_rate": 4.000865928921453e-07, |
|
"logits/chosen": -1.8376388549804688, |
|
"logits/rejected": -1.8330386877059937, |
|
"logps/chosen": -57.87821578979492, |
|
"logps/rejected": -65.87061309814453, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9233464002609253, |
|
"rewards/margins": 0.7766343355178833, |
|
"rewards/rejected": -1.6999807357788086, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1098099270336006, |
|
"grad_norm": 29.70059585571289, |
|
"learning_rate": 3.967084883099356e-07, |
|
"logits/chosen": -1.843123435974121, |
|
"logits/rejected": -1.8221601247787476, |
|
"logps/chosen": -56.51304244995117, |
|
"logps/rejected": -67.86689758300781, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9158357381820679, |
|
"rewards/margins": 0.8356377482414246, |
|
"rewards/rejected": -1.7514736652374268, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1242230429691018, |
|
"grad_norm": 35.48520278930664, |
|
"learning_rate": 3.932890221392945e-07, |
|
"logits/chosen": -1.8560593128204346, |
|
"logits/rejected": -1.8394947052001953, |
|
"logps/chosen": -58.27685546875, |
|
"logps/rejected": -69.38211822509766, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.931163489818573, |
|
"rewards/margins": 0.8966981768608093, |
|
"rewards/rejected": -1.8278617858886719, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1386361589046032, |
|
"grad_norm": 29.43699073791504, |
|
"learning_rate": 3.8982915843181873e-07, |
|
"logits/chosen": -1.8902702331542969, |
|
"logits/rejected": -1.8777059316635132, |
|
"logps/chosen": -57.33638381958008, |
|
"logps/rejected": -68.3757095336914, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8548051714897156, |
|
"rewards/margins": 0.8410500288009644, |
|
"rewards/rejected": -1.6958551406860352, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1530492748401044, |
|
"grad_norm": 34.55091094970703, |
|
"learning_rate": 3.8632987262840035e-07, |
|
"logits/chosen": -1.842508316040039, |
|
"logits/rejected": -1.8238246440887451, |
|
"logps/chosen": -56.97047805786133, |
|
"logps/rejected": -67.20683288574219, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9612547755241394, |
|
"rewards/margins": 0.7814940214157104, |
|
"rewards/rejected": -1.7427488565444946, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1674623907756059, |
|
"grad_norm": 33.40706253051758, |
|
"learning_rate": 3.8279215128422e-07, |
|
"logits/chosen": -1.911771535873413, |
|
"logits/rejected": -1.9070079326629639, |
|
"logps/chosen": -57.28943634033203, |
|
"logps/rejected": -66.23017883300781, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8022342920303345, |
|
"rewards/margins": 0.6388766765594482, |
|
"rewards/rejected": -1.4411109685897827, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.181875506711107, |
|
"grad_norm": 26.427078247070312, |
|
"learning_rate": 3.792169917906075e-07, |
|
"logits/chosen": -1.8850971460342407, |
|
"logits/rejected": -1.872187852859497, |
|
"logps/chosen": -56.62986373901367, |
|
"logps/rejected": -63.857086181640625, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7205672264099121, |
|
"rewards/margins": 0.7504978179931641, |
|
"rewards/rejected": -1.4710649251937866, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1962886226466085, |
|
"grad_norm": 36.381370544433594, |
|
"learning_rate": 3.7560540209384623e-07, |
|
"logits/chosen": -1.8158347606658936, |
|
"logits/rejected": -1.8129236698150635, |
|
"logps/chosen": -51.83489227294922, |
|
"logps/rejected": -60.24357986450195, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.8396095037460327, |
|
"rewards/margins": 0.7031906843185425, |
|
"rewards/rejected": -1.5428001880645752, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2107017385821097, |
|
"grad_norm": 19.65403938293457, |
|
"learning_rate": 3.719584004110028e-07, |
|
"logits/chosen": -1.8365529775619507, |
|
"logits/rejected": -1.8232309818267822, |
|
"logps/chosen": -55.054298400878906, |
|
"logps/rejected": -66.90823364257812, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.955776572227478, |
|
"rewards/margins": 0.8788663744926453, |
|
"rewards/rejected": -1.8346431255340576, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.225114854517611, |
|
"grad_norm": 23.882118225097656, |
|
"learning_rate": 3.6827701494286073e-07, |
|
"logits/chosen": -1.7997219562530518, |
|
"logits/rejected": -1.7894538640975952, |
|
"logps/chosen": -61.450218200683594, |
|
"logps/rejected": -72.72858428955078, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.0577744245529175, |
|
"rewards/margins": 0.9473785161972046, |
|
"rewards/rejected": -2.005152940750122, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2395279704531124, |
|
"grad_norm": 36.03754806518555, |
|
"learning_rate": 3.6456228358403906e-07, |
|
"logits/chosen": -1.7837250232696533, |
|
"logits/rejected": -1.7650636434555054, |
|
"logps/chosen": -52.76055908203125, |
|
"logps/rejected": -64.60010528564453, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.9400711059570312, |
|
"rewards/margins": 0.9297584295272827, |
|
"rewards/rejected": -1.869829535484314, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2539410863886136, |
|
"grad_norm": 26.958053588867188, |
|
"learning_rate": 3.608152536303784e-07, |
|
"logits/chosen": -1.8296825885772705, |
|
"logits/rejected": -1.8235883712768555, |
|
"logps/chosen": -57.7917594909668, |
|
"logps/rejected": -69.37285614013672, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.9810036420822144, |
|
"rewards/margins": 0.9666363000869751, |
|
"rewards/rejected": -1.9476398229599, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.268354202324115, |
|
"grad_norm": 41.44581985473633, |
|
"learning_rate": 3.570369814836765e-07, |
|
"logits/chosen": -1.8446134328842163, |
|
"logits/rejected": -1.8234678506851196, |
|
"logps/chosen": -62.26701736450195, |
|
"logps/rejected": -72.74183654785156, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.065502643585205, |
|
"rewards/margins": 1.0432883501052856, |
|
"rewards/rejected": -2.1087911128997803, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2827673182596162, |
|
"grad_norm": 33.43363952636719, |
|
"learning_rate": 3.532285323538562e-07, |
|
"logits/chosen": -1.8326313495635986, |
|
"logits/rejected": -1.8126726150512695, |
|
"logps/chosen": -58.24235916137695, |
|
"logps/rejected": -67.00035858154297, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9739583134651184, |
|
"rewards/margins": 0.7338631749153137, |
|
"rewards/rejected": -1.7078216075897217, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2971804341951176, |
|
"grad_norm": 28.870502471923828, |
|
"learning_rate": 3.493909799586503e-07, |
|
"logits/chosen": -1.830145239830017, |
|
"logits/rejected": -1.8221817016601562, |
|
"logps/chosen": -55.99808883666992, |
|
"logps/rejected": -66.1963882446289, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8822668790817261, |
|
"rewards/margins": 0.6761519908905029, |
|
"rewards/rejected": -1.5584189891815186, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3115935501306188, |
|
"grad_norm": 37.35979461669922, |
|
"learning_rate": 3.4552540622088826e-07, |
|
"logits/chosen": -1.7925065755844116, |
|
"logits/rejected": -1.7871220111846924, |
|
"logps/chosen": -52.560211181640625, |
|
"logps/rejected": -63.6556396484375, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8926779627799988, |
|
"rewards/margins": 0.8026574850082397, |
|
"rewards/rejected": -1.6953353881835938, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.32600666606612, |
|
"grad_norm": 25.262666702270508, |
|
"learning_rate": 3.416329009634687e-07, |
|
"logits/chosen": -1.76953125, |
|
"logits/rejected": -1.7501062154769897, |
|
"logps/chosen": -55.17573928833008, |
|
"logps/rejected": -65.00733184814453, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9110902547836304, |
|
"rewards/margins": 0.8462222218513489, |
|
"rewards/rejected": -1.757312536239624, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3404197820016215, |
|
"grad_norm": 32.45104217529297, |
|
"learning_rate": 3.377145616021055e-07, |
|
"logits/chosen": -1.8046982288360596, |
|
"logits/rejected": -1.7909055948257446, |
|
"logps/chosen": -57.70347213745117, |
|
"logps/rejected": -69.15714263916016, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.0629938840866089, |
|
"rewards/margins": 0.8470731973648071, |
|
"rewards/rejected": -1.9100669622421265, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.354832897937123, |
|
"grad_norm": 39.708580017089844, |
|
"learning_rate": 3.337714928359326e-07, |
|
"logits/chosen": -1.7561019659042358, |
|
"logits/rejected": -1.7309824228286743, |
|
"logps/chosen": -56.85551834106445, |
|
"logps/rejected": -67.09632873535156, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.0887633562088013, |
|
"rewards/margins": 0.8313824534416199, |
|
"rewards/rejected": -1.9201457500457764, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.3692460138726241, |
|
"grad_norm": 27.966632843017578, |
|
"learning_rate": 3.2980480633605616e-07, |
|
"logits/chosen": -1.7208874225616455, |
|
"logits/rejected": -1.7174994945526123, |
|
"logps/chosen": -57.519561767578125, |
|
"logps/rejected": -69.14530944824219, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1282824277877808, |
|
"rewards/margins": 0.8654249906539917, |
|
"rewards/rejected": -1.993707299232483, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3836591298081253, |
|
"grad_norm": 26.737092971801758, |
|
"learning_rate": 3.2581562043214015e-07, |
|
"logits/chosen": -1.7533900737762451, |
|
"logits/rejected": -1.7562023401260376, |
|
"logps/chosen": -54.5827751159668, |
|
"logps/rejected": -67.70366668701172, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.157460331916809, |
|
"rewards/margins": 0.9240506887435913, |
|
"rewards/rejected": -2.0815110206604004, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3980722457436268, |
|
"grad_norm": 28.375288009643555, |
|
"learning_rate": 3.2180505979711557e-07, |
|
"logits/chosen": -1.7657930850982666, |
|
"logits/rejected": -1.7565358877182007, |
|
"logps/chosen": -59.47906494140625, |
|
"logps/rejected": -68.41764831542969, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1187207698822021, |
|
"rewards/margins": 0.7722535729408264, |
|
"rewards/rejected": -1.8909746408462524, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.412485361679128, |
|
"grad_norm": 34.44431686401367, |
|
"learning_rate": 3.1777425513010055e-07, |
|
"logits/chosen": -1.7356555461883545, |
|
"logits/rejected": -1.7229560613632202, |
|
"logps/chosen": -57.01166915893555, |
|
"logps/rejected": -69.17829895019531, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.2534024715423584, |
|
"rewards/margins": 0.9338359832763672, |
|
"rewards/rejected": -2.1872386932373047, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4268984776146292, |
|
"grad_norm": 22.93629264831543, |
|
"learning_rate": 3.1372434283762205e-07, |
|
"logits/chosen": -1.7858479022979736, |
|
"logits/rejected": -1.777570366859436, |
|
"logps/chosen": -55.632896423339844, |
|
"logps/rejected": -67.86164855957031, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1818989515304565, |
|
"rewards/margins": 0.8865016102790833, |
|
"rewards/rejected": -2.0684006214141846, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4413115935501306, |
|
"grad_norm": 27.421680450439453, |
|
"learning_rate": 3.0965646471322844e-07, |
|
"logits/chosen": -1.797176718711853, |
|
"logits/rejected": -1.782179594039917, |
|
"logps/chosen": -53.52863311767578, |
|
"logps/rejected": -65.38296508789062, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0196263790130615, |
|
"rewards/margins": 0.8526620864868164, |
|
"rewards/rejected": -1.8722883462905884, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.455724709485632, |
|
"grad_norm": 28.67449951171875, |
|
"learning_rate": 3.055717676155827e-07, |
|
"logits/chosen": -1.7956054210662842, |
|
"logits/rejected": -1.782934546470642, |
|
"logps/chosen": -54.95515060424805, |
|
"logps/rejected": -64.83052825927734, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9566730260848999, |
|
"rewards/margins": 0.7631611227989197, |
|
"rewards/rejected": -1.7198339700698853, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4701378254211332, |
|
"grad_norm": 29.06233024597168, |
|
"learning_rate": 3.0147140314512853e-07, |
|
"logits/chosen": -1.8102481365203857, |
|
"logits/rejected": -1.806222915649414, |
|
"logps/chosen": -58.04301071166992, |
|
"logps/rejected": -70.16658020019531, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.9563525915145874, |
|
"rewards/margins": 0.8730182647705078, |
|
"rewards/rejected": -1.8293708562850952, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.4845509413566345, |
|
"grad_norm": 19.513904571533203, |
|
"learning_rate": 2.973565273194188e-07, |
|
"logits/chosen": -1.7972570657730103, |
|
"logits/rejected": -1.7693393230438232, |
|
"logps/chosen": -58.28984451293945, |
|
"logps/rejected": -66.8588638305664, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.035782814025879, |
|
"rewards/margins": 0.8242384195327759, |
|
"rewards/rejected": -1.8600209951400757, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4989640572921359, |
|
"grad_norm": 29.952312469482422, |
|
"learning_rate": 2.932283002471991e-07, |
|
"logits/chosen": -1.8092374801635742, |
|
"logits/rejected": -1.7851841449737549, |
|
"logps/chosen": -60.324623107910156, |
|
"logps/rejected": -69.95586395263672, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.007617712020874, |
|
"rewards/margins": 0.7622562646865845, |
|
"rewards/rejected": -1.7698739767074585, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.513377173227637, |
|
"grad_norm": 25.340177536010742, |
|
"learning_rate": 2.89087885801338e-07, |
|
"logits/chosen": -1.8064777851104736, |
|
"logits/rejected": -1.806052565574646, |
|
"logps/chosen": -51.601837158203125, |
|
"logps/rejected": -61.78096389770508, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.838117778301239, |
|
"rewards/margins": 0.8140629529953003, |
|
"rewards/rejected": -1.6521809101104736, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5277902891631383, |
|
"grad_norm": 37.2698860168457, |
|
"learning_rate": 2.8493645129069535e-07, |
|
"logits/chosen": -1.7569599151611328, |
|
"logits/rejected": -1.730499029159546, |
|
"logps/chosen": -56.78059005737305, |
|
"logps/rejected": -66.43777465820312, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8391516804695129, |
|
"rewards/margins": 0.7788572311401367, |
|
"rewards/rejected": -1.6180089712142944, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5422034050986397, |
|
"grad_norm": 31.261423110961914, |
|
"learning_rate": 2.807751671310231e-07, |
|
"logits/chosen": -1.7782785892486572, |
|
"logits/rejected": -1.7672075033187866, |
|
"logps/chosen": -61.70476150512695, |
|
"logps/rejected": -71.72880554199219, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.9828069806098938, |
|
"rewards/margins": 0.8402408361434937, |
|
"rewards/rejected": -1.8230478763580322, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.5566165210341412, |
|
"grad_norm": 27.57630157470703, |
|
"learning_rate": 2.7660520651498853e-07, |
|
"logits/chosen": -1.778074026107788, |
|
"logits/rejected": -1.7654485702514648, |
|
"logps/chosen": -61.545814514160156, |
|
"logps/rejected": -71.43892669677734, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9759114384651184, |
|
"rewards/margins": 0.8692989349365234, |
|
"rewards/rejected": -1.845210313796997, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5710296369696424, |
|
"grad_norm": 35.753257751464844, |
|
"learning_rate": 2.7242774508141663e-07, |
|
"logits/chosen": -1.7737147808074951, |
|
"logits/rejected": -1.7685718536376953, |
|
"logps/chosen": -58.334930419921875, |
|
"logps/rejected": -69.20314025878906, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.9689555168151855, |
|
"rewards/margins": 0.7592560648918152, |
|
"rewards/rejected": -1.7282116413116455, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.5854427529051436, |
|
"grad_norm": 29.153667449951172, |
|
"learning_rate": 2.682439605838408e-07, |
|
"logits/chosen": -1.8176618814468384, |
|
"logits/rejected": -1.812830924987793, |
|
"logps/chosen": -57.24834442138672, |
|
"logps/rejected": -68.6217041015625, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8450204730033875, |
|
"rewards/margins": 0.7896521687507629, |
|
"rewards/rejected": -1.6346725225448608, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.599855868840645, |
|
"grad_norm": 31.53214454650879, |
|
"learning_rate": 2.6405503255845875e-07, |
|
"logits/chosen": -1.82248055934906, |
|
"logits/rejected": -1.814016580581665, |
|
"logps/chosen": -59.1510124206543, |
|
"logps/rejected": -70.27528381347656, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7771763801574707, |
|
"rewards/margins": 0.8746312856674194, |
|
"rewards/rejected": -1.6518075466156006, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.6142689847761464, |
|
"grad_norm": 32.77677536010742, |
|
"learning_rate": 2.598621419915853e-07, |
|
"logits/chosen": -1.757372260093689, |
|
"logits/rejected": -1.7502870559692383, |
|
"logps/chosen": -53.64165496826172, |
|
"logps/rejected": -66.46639251708984, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.850943922996521, |
|
"rewards/margins": 0.9450072050094604, |
|
"rewards/rejected": -1.795951247215271, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6286821007116477, |
|
"grad_norm": 45.913150787353516, |
|
"learning_rate": 2.5566647098669636e-07, |
|
"logits/chosen": -1.8059905767440796, |
|
"logits/rejected": -1.7845344543457031, |
|
"logps/chosen": -55.63075637817383, |
|
"logps/rejected": -65.79679870605469, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.853289008140564, |
|
"rewards/margins": 0.7583447098731995, |
|
"rewards/rejected": -1.6116336584091187, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.6430952166471489, |
|
"grad_norm": 32.05988311767578, |
|
"learning_rate": 2.5146920243115764e-07, |
|
"logits/chosen": -1.7860431671142578, |
|
"logits/rejected": -1.7688003778457642, |
|
"logps/chosen": -56.956817626953125, |
|
"logps/rejected": -65.7645263671875, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.8115145564079285, |
|
"rewards/margins": 0.6689954996109009, |
|
"rewards/rejected": -1.4805099964141846, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.6575083325826503, |
|
"grad_norm": 25.664705276489258, |
|
"learning_rate": 2.4727151966273337e-07, |
|
"logits/chosen": -1.7770273685455322, |
|
"logits/rejected": -1.7563207149505615, |
|
"logps/chosen": -55.638404846191406, |
|
"logps/rejected": -62.886566162109375, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.8054786920547485, |
|
"rewards/margins": 0.7769336104393005, |
|
"rewards/rejected": -1.5824123620986938, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6719214485181515, |
|
"grad_norm": 28.833738327026367, |
|
"learning_rate": 2.4307460613596694e-07, |
|
"logits/chosen": -1.8111257553100586, |
|
"logits/rejected": -1.7967822551727295, |
|
"logps/chosen": -57.33094024658203, |
|
"logps/rejected": -67.30760192871094, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.799383819103241, |
|
"rewards/margins": 0.8343574404716492, |
|
"rewards/rejected": -1.6337411403656006, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.6863345644536527, |
|
"grad_norm": 21.161962509155273, |
|
"learning_rate": 2.388796450885288e-07, |
|
"logits/chosen": -1.8059934377670288, |
|
"logits/rejected": -1.798081398010254, |
|
"logps/chosen": -50.3988151550293, |
|
"logps/rejected": -62.55088424682617, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.8187153935432434, |
|
"rewards/margins": 0.9717043042182922, |
|
"rewards/rejected": -1.7904198169708252, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7007476803891541, |
|
"grad_norm": 33.9837760925293, |
|
"learning_rate": 2.3468781920762646e-07, |
|
"logits/chosen": -1.7467705011367798, |
|
"logits/rejected": -1.7256418466567993, |
|
"logps/chosen": -58.491493225097656, |
|
"logps/rejected": -70.8143310546875, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.9473183751106262, |
|
"rewards/margins": 1.0221917629241943, |
|
"rewards/rejected": -1.9695100784301758, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7151607963246556, |
|
"grad_norm": 25.674482345581055, |
|
"learning_rate": 2.3050031029656825e-07, |
|
"logits/chosen": -1.800931692123413, |
|
"logits/rejected": -1.7879711389541626, |
|
"logps/chosen": -60.068443298339844, |
|
"logps/rejected": -69.20748138427734, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9745752215385437, |
|
"rewards/margins": 0.8433617353439331, |
|
"rewards/rejected": -1.817936897277832, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7295739122601568, |
|
"grad_norm": 32.89895248413086, |
|
"learning_rate": 2.2631829894157754e-07, |
|
"logits/chosen": -1.76886785030365, |
|
"logits/rejected": -1.754063606262207, |
|
"logps/chosen": -58.20952224731445, |
|
"logps/rejected": -69.31002044677734, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.061232328414917, |
|
"rewards/margins": 0.8504399061203003, |
|
"rewards/rejected": -1.9116723537445068, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.743987028195658, |
|
"grad_norm": 30.052446365356445, |
|
"learning_rate": 2.2214296417894906e-07, |
|
"logits/chosen": -1.7634525299072266, |
|
"logits/rejected": -1.7487919330596924, |
|
"logps/chosen": -55.20353317260742, |
|
"logps/rejected": -65.99549865722656, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.9935828447341919, |
|
"rewards/margins": 0.8899686932563782, |
|
"rewards/rejected": -1.8835513591766357, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.7584001441311594, |
|
"grad_norm": 31.799606323242188, |
|
"learning_rate": 2.1797548316264319e-07, |
|
"logits/chosen": -1.7502672672271729, |
|
"logits/rejected": -1.740473747253418, |
|
"logps/chosen": -56.68674850463867, |
|
"logps/rejected": -67.84220123291016, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0200823545455933, |
|
"rewards/margins": 0.803787887096405, |
|
"rewards/rejected": -1.823870062828064, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.7728132600666606, |
|
"grad_norm": 32.939762115478516, |
|
"learning_rate": 2.1381703083240987e-07, |
|
"logits/chosen": -1.810317039489746, |
|
"logits/rejected": -1.7904523611068726, |
|
"logps/chosen": -56.20463943481445, |
|
"logps/rejected": -65.8335189819336, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9756546020507812, |
|
"rewards/margins": 0.9413552284240723, |
|
"rewards/rejected": -1.917009711265564, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.7872263760021618, |
|
"grad_norm": 26.08432388305664, |
|
"learning_rate": 2.0966877958253787e-07, |
|
"logits/chosen": -1.8199723958969116, |
|
"logits/rejected": -1.801325798034668, |
|
"logps/chosen": -61.7148323059082, |
|
"logps/rejected": -71.31344604492188, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.037131667137146, |
|
"rewards/margins": 0.8753975033760071, |
|
"rewards/rejected": -1.9125293493270874, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8016394919376633, |
|
"grad_norm": 32.24626922607422, |
|
"learning_rate": 2.0553189893132076e-07, |
|
"logits/chosen": -1.7616393566131592, |
|
"logits/rejected": -1.7485902309417725, |
|
"logps/chosen": -53.9367561340332, |
|
"logps/rejected": -66.37442016601562, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9454119801521301, |
|
"rewards/margins": 0.9221888780593872, |
|
"rewards/rejected": -1.8676010370254517, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8160526078731647, |
|
"grad_norm": 23.929574966430664, |
|
"learning_rate": 2.0140755519133434e-07, |
|
"logits/chosen": -1.71337890625, |
|
"logits/rejected": -1.7015049457550049, |
|
"logps/chosen": -59.80029296875, |
|
"logps/rejected": -70.41039276123047, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.0442065000534058, |
|
"rewards/margins": 0.8991094827651978, |
|
"rewards/rejected": -1.943315863609314, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.830465723808666, |
|
"grad_norm": 25.99103355407715, |
|
"learning_rate": 1.9729691114061736e-07, |
|
"logits/chosen": -1.7196881771087646, |
|
"logits/rejected": -1.7151161432266235, |
|
"logps/chosen": -53.206695556640625, |
|
"logps/rejected": -67.19075012207031, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.0664688348770142, |
|
"rewards/margins": 1.1020677089691162, |
|
"rewards/rejected": -2.16853666305542, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.844878839744167, |
|
"grad_norm": 32.41804122924805, |
|
"learning_rate": 1.9320112569484946e-07, |
|
"logits/chosen": -1.7552703619003296, |
|
"logits/rejected": -1.7278478145599365, |
|
"logps/chosen": -55.47632598876953, |
|
"logps/rejected": -64.41392517089844, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.140154480934143, |
|
"rewards/margins": 0.7451371550559998, |
|
"rewards/rejected": -1.8852916955947876, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.8592919556796685, |
|
"grad_norm": 32.51744079589844, |
|
"learning_rate": 1.8912135358061694e-07, |
|
"logits/chosen": -1.796501874923706, |
|
"logits/rejected": -1.779625654220581, |
|
"logps/chosen": -58.72721481323242, |
|
"logps/rejected": -69.07386779785156, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0752990245819092, |
|
"rewards/margins": 0.8940299153327942, |
|
"rewards/rejected": -1.9693288803100586, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.87370507161517, |
|
"grad_norm": 29.42111587524414, |
|
"learning_rate": 1.8505874500986088e-07, |
|
"logits/chosen": -1.78672194480896, |
|
"logits/rejected": -1.7740051746368408, |
|
"logps/chosen": -53.470664978027344, |
|
"logps/rejected": -64.02452087402344, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.0004963874816895, |
|
"rewards/margins": 0.8680494427680969, |
|
"rewards/rejected": -1.8685458898544312, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.888118187550671, |
|
"grad_norm": 36.914894104003906, |
|
"learning_rate": 1.8101444535559656e-07, |
|
"logits/chosen": -1.7614473104476929, |
|
"logits/rejected": -1.7427914142608643, |
|
"logps/chosen": -55.6743049621582, |
|
"logps/rejected": -66.42680358886719, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.0334885120391846, |
|
"rewards/margins": 0.9464027285575867, |
|
"rewards/rejected": -1.9798911809921265, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9025313034861724, |
|
"grad_norm": 32.4942512512207, |
|
"learning_rate": 1.769895948289989e-07, |
|
"logits/chosen": -1.8014084100723267, |
|
"logits/rejected": -1.7888679504394531, |
|
"logps/chosen": -62.43854904174805, |
|
"logps/rejected": -71.75282287597656, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.093829870223999, |
|
"rewards/margins": 0.8591756820678711, |
|
"rewards/rejected": -1.9530055522918701, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9169444194216738, |
|
"grad_norm": 35.89755630493164, |
|
"learning_rate": 1.729853281579414e-07, |
|
"logits/chosen": -1.7554981708526611, |
|
"logits/rejected": -1.7466237545013428, |
|
"logps/chosen": -62.69568634033203, |
|
"logps/rejected": -75.16825866699219, |
|
"loss": 0.4606, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -1.1235264539718628, |
|
"rewards/margins": 0.9610961079597473, |
|
"rewards/rejected": -2.084622383117676, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.931357535357175, |
|
"grad_norm": 23.533727645874023, |
|
"learning_rate": 1.6900277426708222e-07, |
|
"logits/chosen": -1.7824815511703491, |
|
"logits/rejected": -1.7752597332000732, |
|
"logps/chosen": -59.05015182495117, |
|
"logps/rejected": -70.8111343383789, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.1483525037765503, |
|
"rewards/margins": 0.8362733125686646, |
|
"rewards/rejected": -1.9846255779266357, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.9457706512926762, |
|
"grad_norm": 43.141048431396484, |
|
"learning_rate": 1.650430559595859e-07, |
|
"logits/chosen": -1.7303861379623413, |
|
"logits/rejected": -1.7137393951416016, |
|
"logps/chosen": -62.492454528808594, |
|
"logps/rejected": -71.88333129882812, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2485052347183228, |
|
"rewards/margins": 0.8395478129386902, |
|
"rewards/rejected": -2.088052749633789, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.9601837672281777, |
|
"grad_norm": 34.501712799072266, |
|
"learning_rate": 1.6110728960057106e-07, |
|
"logits/chosen": -1.7456886768341064, |
|
"logits/rejected": -1.7302719354629517, |
|
"logps/chosen": -56.20106887817383, |
|
"logps/rejected": -67.40886688232422, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.1765556335449219, |
|
"rewards/margins": 0.8575465083122253, |
|
"rewards/rejected": -2.034101963043213, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.974596883163679, |
|
"grad_norm": 44.137115478515625, |
|
"learning_rate": 1.5719658480237269e-07, |
|
"logits/chosen": -1.7316787242889404, |
|
"logits/rejected": -1.7204573154449463, |
|
"logps/chosen": -57.36134719848633, |
|
"logps/rejected": -68.31610870361328, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3111565113067627, |
|
"rewards/margins": 0.8917368054389954, |
|
"rewards/rejected": -2.2028934955596924, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.9890099990991803, |
|
"grad_norm": 32.499935150146484, |
|
"learning_rate": 1.5331204411170932e-07, |
|
"logits/chosen": -1.7521297931671143, |
|
"logits/rejected": -1.7524783611297607, |
|
"logps/chosen": -61.551841735839844, |
|
"logps/rejected": -73.5947265625, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.2590062618255615, |
|
"rewards/margins": 0.8878059387207031, |
|
"rewards/rejected": -2.1468119621276855, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0034231150346815, |
|
"grad_norm": 35.52438735961914, |
|
"learning_rate": 1.494547626988408e-07, |
|
"logits/chosen": -1.7367178201675415, |
|
"logits/rejected": -1.7304236888885498, |
|
"logps/chosen": -59.0101203918457, |
|
"logps/rejected": -69.7065658569336, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -1.156428575515747, |
|
"rewards/margins": 0.8491467237472534, |
|
"rewards/rejected": -2.005575656890869, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.017836230970183, |
|
"grad_norm": 28.009803771972656, |
|
"learning_rate": 1.456258280488073e-07, |
|
"logits/chosen": -1.691402792930603, |
|
"logits/rejected": -1.6789098978042603, |
|
"logps/chosen": -55.73322677612305, |
|
"logps/rejected": -70.45665740966797, |
|
"loss": 0.4085, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.081807255744934, |
|
"rewards/margins": 1.1556535959243774, |
|
"rewards/rejected": -2.2374606132507324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0322493469056844, |
|
"grad_norm": 27.49490737915039, |
|
"learning_rate": 1.4182631965483305e-07, |
|
"logits/chosen": -1.7311683893203735, |
|
"logits/rejected": -1.726595163345337, |
|
"logps/chosen": -58.620765686035156, |
|
"logps/rejected": -72.42903137207031, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.149526834487915, |
|
"rewards/margins": 1.0777475833892822, |
|
"rewards/rejected": -2.2272744178771973, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.0466624628411854, |
|
"grad_norm": 30.135372161865234, |
|
"learning_rate": 1.3805730871398584e-07, |
|
"logits/chosen": -1.6934964656829834, |
|
"logits/rejected": -1.677056908607483, |
|
"logps/chosen": -53.71419143676758, |
|
"logps/rejected": -67.37553405761719, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.1637048721313477, |
|
"rewards/margins": 1.193371057510376, |
|
"rewards/rejected": -2.3570759296417236, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.061075578776687, |
|
"grad_norm": 26.996715545654297, |
|
"learning_rate": 1.3431985782517226e-07, |
|
"logits/chosen": -1.6753069162368774, |
|
"logits/rejected": -1.65777587890625, |
|
"logps/chosen": -57.29792404174805, |
|
"logps/rejected": -70.36094665527344, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.2193354368209839, |
|
"rewards/margins": 1.0907495021820068, |
|
"rewards/rejected": -2.310084819793701, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.075488694712188, |
|
"grad_norm": 30.4396915435791, |
|
"learning_rate": 1.3061502068956042e-07, |
|
"logits/chosen": -1.6357700824737549, |
|
"logits/rejected": -1.6307754516601562, |
|
"logps/chosen": -56.32429885864258, |
|
"logps/rejected": -70.00544738769531, |
|
"loss": 0.3999, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -1.1628742218017578, |
|
"rewards/margins": 1.1366623640060425, |
|
"rewards/rejected": -2.2995364665985107, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.089901810647689, |
|
"grad_norm": 33.75346374511719, |
|
"learning_rate": 1.2694384181350792e-07, |
|
"logits/chosen": -1.6029478311538696, |
|
"logits/rejected": -1.5960829257965088, |
|
"logps/chosen": -62.79957962036133, |
|
"logps/rejected": -76.08135223388672, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.2355473041534424, |
|
"rewards/margins": 1.1187636852264404, |
|
"rewards/rejected": -2.3543107509613037, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1043149265831906, |
|
"grad_norm": 23.287256240844727, |
|
"learning_rate": 1.2330735621408514e-07, |
|
"logits/chosen": -1.6741046905517578, |
|
"logits/rejected": -1.6551471948623657, |
|
"logps/chosen": -58.5982780456543, |
|
"logps/rejected": -71.42253112792969, |
|
"loss": 0.3971, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2658870220184326, |
|
"rewards/margins": 1.1862525939941406, |
|
"rewards/rejected": -2.4521396160125732, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.118728042518692, |
|
"grad_norm": 24.4993896484375, |
|
"learning_rate": 1.1970658912727138e-07, |
|
"logits/chosen": -1.612346887588501, |
|
"logits/rejected": -1.5920403003692627, |
|
"logps/chosen": -59.92454147338867, |
|
"logps/rejected": -73.3915786743164, |
|
"loss": 0.4134, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.242612600326538, |
|
"rewards/margins": 1.216597557067871, |
|
"rewards/rejected": -2.459210157394409, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.1331411584541935, |
|
"grad_norm": 41.4117546081543, |
|
"learning_rate": 1.1614255571891016e-07, |
|
"logits/chosen": -1.5743157863616943, |
|
"logits/rejected": -1.5713180303573608, |
|
"logps/chosen": -61.38301467895508, |
|
"logps/rejected": -75.31806182861328, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4350389242172241, |
|
"rewards/margins": 1.2023990154266357, |
|
"rewards/rejected": -2.6374378204345703, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.1475542743896945, |
|
"grad_norm": 25.956771850585938, |
|
"learning_rate": 1.1261626079850295e-07, |
|
"logits/chosen": -1.5780308246612549, |
|
"logits/rejected": -1.5647315979003906, |
|
"logps/chosen": -59.95500564575195, |
|
"logps/rejected": -76.34349060058594, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.2126551866531372, |
|
"rewards/margins": 1.368710994720459, |
|
"rewards/rejected": -2.5813660621643066, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.161967390325196, |
|
"grad_norm": 31.3956298828125, |
|
"learning_rate": 1.0912869853592247e-07, |
|
"logits/chosen": -1.6019783020019531, |
|
"logits/rejected": -1.5898057222366333, |
|
"logps/chosen": -61.808135986328125, |
|
"logps/rejected": -77.27191925048828, |
|
"loss": 0.3935, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3138659000396729, |
|
"rewards/margins": 1.354296088218689, |
|
"rewards/rejected": -2.6681618690490723, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1763805062606973, |
|
"grad_norm": 35.722694396972656, |
|
"learning_rate": 1.056808521811268e-07, |
|
"logits/chosen": -1.561440348625183, |
|
"logits/rejected": -1.546866774559021, |
|
"logps/chosen": -57.341331481933594, |
|
"logps/rejected": -72.38762664794922, |
|
"loss": 0.4121, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2446476221084595, |
|
"rewards/margins": 1.2853491306304932, |
|
"rewards/rejected": -2.529996633529663, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.1907936221961988, |
|
"grad_norm": 38.04680252075195, |
|
"learning_rate": 1.0227369378695006e-07, |
|
"logits/chosen": -1.60148024559021, |
|
"logits/rejected": -1.5893213748931885, |
|
"logps/chosen": -63.2020378112793, |
|
"logps/rejected": -78.1455307006836, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.4317493438720703, |
|
"rewards/margins": 1.2837940454483032, |
|
"rewards/rejected": -2.715543270111084, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.2052067381316998, |
|
"grad_norm": 29.301164627075195, |
|
"learning_rate": 9.890818393505251e-08, |
|
"logits/chosen": -1.586211085319519, |
|
"logits/rejected": -1.5707120895385742, |
|
"logps/chosen": -65.30186462402344, |
|
"logps/rejected": -80.65480041503906, |
|
"loss": 0.3727, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -1.4216526746749878, |
|
"rewards/margins": 1.3828752040863037, |
|
"rewards/rejected": -2.804527997970581, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.219619854067201, |
|
"grad_norm": 29.37394905090332, |
|
"learning_rate": 9.55852714651019e-08, |
|
"logits/chosen": -1.5325640439987183, |
|
"logits/rejected": -1.5217316150665283, |
|
"logps/chosen": -61.10272979736328, |
|
"logps/rejected": -73.1271743774414, |
|
"loss": 0.416, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -1.4346240758895874, |
|
"rewards/margins": 1.1002185344696045, |
|
"rewards/rejected": -2.5348427295684814, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.2340329700027026, |
|
"grad_norm": 26.91488265991211, |
|
"learning_rate": 9.230589320726809e-08, |
|
"logits/chosen": -1.540071725845337, |
|
"logits/rejected": -1.5339853763580322, |
|
"logps/chosen": -57.176513671875, |
|
"logps/rejected": -70.31219482421875, |
|
"loss": 0.4096, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -1.4375228881835938, |
|
"rewards/margins": 1.2083842754364014, |
|
"rewards/rejected": -2.645906925201416, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.2484460859382036, |
|
"grad_norm": 31.109094619750977, |
|
"learning_rate": 8.907097371810085e-08, |
|
"logits/chosen": -1.5355430841445923, |
|
"logits/rejected": -1.530775785446167, |
|
"logps/chosen": -59.644500732421875, |
|
"logps/rejected": -74.54759216308594, |
|
"loss": 0.411, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -1.4771003723144531, |
|
"rewards/margins": 1.1839910745620728, |
|
"rewards/rejected": -2.6610913276672363, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.262859201873705, |
|
"grad_norm": 25.765758514404297, |
|
"learning_rate": 8.588142501987017e-08, |
|
"logits/chosen": -1.5403801202774048, |
|
"logits/rejected": -1.522972822189331, |
|
"logps/chosen": -60.12090301513672, |
|
"logps/rejected": -74.10574340820312, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4168094396591187, |
|
"rewards/margins": 1.23050856590271, |
|
"rewards/rejected": -2.647318124771118, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.2772723178092065, |
|
"grad_norm": 35.56646728515625, |
|
"learning_rate": 8.273814634343893e-08, |
|
"logits/chosen": -1.5780024528503418, |
|
"logits/rejected": -1.5696873664855957, |
|
"logps/chosen": -56.7363166809082, |
|
"logps/rejected": -71.40894317626953, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4435478448867798, |
|
"rewards/margins": 1.132821798324585, |
|
"rewards/rejected": -2.5763697624206543, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.291685433744708, |
|
"grad_norm": 29.11915397644043, |
|
"learning_rate": 7.96420238747425e-08, |
|
"logits/chosen": -1.547071933746338, |
|
"logits/rejected": -1.5279027223587036, |
|
"logps/chosen": -59.5390510559082, |
|
"logps/rejected": -74.87039184570312, |
|
"loss": 0.4048, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.40023934841156, |
|
"rewards/margins": 1.345477819442749, |
|
"rewards/rejected": -2.7457172870635986, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.306098549680209, |
|
"grad_norm": 32.61679458618164, |
|
"learning_rate": 7.659393050494595e-08, |
|
"logits/chosen": -1.5456218719482422, |
|
"logits/rejected": -1.5411933660507202, |
|
"logps/chosen": -65.36607360839844, |
|
"logps/rejected": -77.77400970458984, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -1.5160075426101685, |
|
"rewards/margins": 1.0541882514953613, |
|
"rewards/rejected": -2.5701956748962402, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.3205116656157103, |
|
"grad_norm": 40.657920837402344, |
|
"learning_rate": 7.35947255843494e-08, |
|
"logits/chosen": -1.5437052249908447, |
|
"logits/rejected": -1.532766580581665, |
|
"logps/chosen": -62.3788948059082, |
|
"logps/rejected": -73.6574478149414, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.4102076292037964, |
|
"rewards/margins": 1.109969139099121, |
|
"rewards/rejected": -2.520177125930786, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.3349247815512117, |
|
"grad_norm": 26.726566314697266, |
|
"learning_rate": 7.064525468011107e-08, |
|
"logits/chosen": -1.5550864934921265, |
|
"logits/rejected": -1.5475775003433228, |
|
"logps/chosen": -57.881187438964844, |
|
"logps/rejected": -72.7550048828125, |
|
"loss": 0.3932, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.3459597826004028, |
|
"rewards/margins": 1.2489373683929443, |
|
"rewards/rejected": -2.5948970317840576, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.3493378974867127, |
|
"grad_norm": 26.51803970336914, |
|
"learning_rate": 6.774634933785611e-08, |
|
"logits/chosen": -1.5511729717254639, |
|
"logits/rejected": -1.5490328073501587, |
|
"logps/chosen": -63.644187927246094, |
|
"logps/rejected": -77.2373046875, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.3264816999435425, |
|
"rewards/margins": 1.1635551452636719, |
|
"rewards/rejected": -2.490036725997925, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.363751013422214, |
|
"grad_norm": 37.48248291015625, |
|
"learning_rate": 6.489882684723872e-08, |
|
"logits/chosen": -1.6325349807739258, |
|
"logits/rejected": -1.611288070678711, |
|
"logps/chosen": -59.67763137817383, |
|
"logps/rejected": -74.39009857177734, |
|
"loss": 0.407, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.3188283443450928, |
|
"rewards/margins": 1.2371572256088257, |
|
"rewards/rejected": -2.555985927581787, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.3781641293577156, |
|
"grad_norm": 33.88801956176758, |
|
"learning_rate": 6.210349001152304e-08, |
|
"logits/chosen": -1.56001877784729, |
|
"logits/rejected": -1.545484185218811, |
|
"logps/chosen": -54.10548782348633, |
|
"logps/rejected": -68.94291687011719, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2859638929367065, |
|
"rewards/margins": 1.1985052824020386, |
|
"rewards/rejected": -2.484469175338745, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.392577245293217, |
|
"grad_norm": 27.400054931640625, |
|
"learning_rate": 5.936112692124884e-08, |
|
"logits/chosen": -1.5780709981918335, |
|
"logits/rejected": -1.5682373046875, |
|
"logps/chosen": -61.6357421875, |
|
"logps/rejected": -77.30577850341797, |
|
"loss": 0.407, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.3840547800064087, |
|
"rewards/margins": 1.2264368534088135, |
|
"rewards/rejected": -2.6104912757873535, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.406990361228718, |
|
"grad_norm": 38.04377746582031, |
|
"learning_rate": 5.66725107320444e-08, |
|
"logits/chosen": -1.546272873878479, |
|
"logits/rejected": -1.5431301593780518, |
|
"logps/chosen": -62.60614776611328, |
|
"logps/rejected": -76.22834014892578, |
|
"loss": 0.4048, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.454508900642395, |
|
"rewards/margins": 1.2125307321548462, |
|
"rewards/rejected": -2.667039394378662, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.4214034771642194, |
|
"grad_norm": 34.18000411987305, |
|
"learning_rate": 5.403839944665081e-08, |
|
"logits/chosen": -1.617413878440857, |
|
"logits/rejected": -1.618520736694336, |
|
"logps/chosen": -62.6517333984375, |
|
"logps/rejected": -76.1615982055664, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.4979777336120605, |
|
"rewards/margins": 1.052832007408142, |
|
"rewards/rejected": -2.550809860229492, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.435816593099721, |
|
"grad_norm": 25.391855239868164, |
|
"learning_rate": 5.1459535701217694e-08, |
|
"logits/chosen": -1.5523961782455444, |
|
"logits/rejected": -1.539656400680542, |
|
"logps/chosen": -59.02225875854492, |
|
"logps/rejected": -77.31755065917969, |
|
"loss": 0.3576, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3841768503189087, |
|
"rewards/margins": 1.4941140413284302, |
|
"rewards/rejected": -2.8782906532287598, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.450229709035222, |
|
"grad_norm": 27.22433090209961, |
|
"learning_rate": 4.8936646555931245e-08, |
|
"logits/chosen": -1.5299670696258545, |
|
"logits/rejected": -1.5135704278945923, |
|
"logps/chosen": -59.751251220703125, |
|
"logps/rejected": -74.75291442871094, |
|
"loss": 0.3773, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -1.446244239807129, |
|
"rewards/margins": 1.4083815813064575, |
|
"rewards/rejected": -2.854625701904297, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4646428249707233, |
|
"grad_norm": 27.11460304260254, |
|
"learning_rate": 4.647044329003458e-08, |
|
"logits/chosen": -1.5377050638198853, |
|
"logits/rejected": -1.5185314416885376, |
|
"logps/chosen": -62.594635009765625, |
|
"logps/rejected": -76.7301254272461, |
|
"loss": 0.4388, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5232594013214111, |
|
"rewards/margins": 1.201012372970581, |
|
"rewards/rejected": -2.7242720127105713, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.4790559409062247, |
|
"grad_norm": 26.611129760742188, |
|
"learning_rate": 4.406162120129548e-08, |
|
"logits/chosen": -1.5484386682510376, |
|
"logits/rejected": -1.5278335809707642, |
|
"logps/chosen": -63.33378982543945, |
|
"logps/rejected": -78.29924774169922, |
|
"loss": 0.4024, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.523051142692566, |
|
"rewards/margins": 1.292851209640503, |
|
"rewards/rejected": -2.8159022331237793, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.493469056841726, |
|
"grad_norm": 27.76645851135254, |
|
"learning_rate": 4.171085940998176e-08, |
|
"logits/chosen": -1.5417293310165405, |
|
"logits/rejected": -1.531988501548767, |
|
"logps/chosen": -62.10388946533203, |
|
"logps/rejected": -76.06646728515625, |
|
"loss": 0.388, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -1.474600076675415, |
|
"rewards/margins": 1.2824805974960327, |
|
"rewards/rejected": -2.757080554962158, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.507882172777227, |
|
"grad_norm": 34.98466110229492, |
|
"learning_rate": 3.941882066739569e-08, |
|
"logits/chosen": -1.5331491231918335, |
|
"logits/rejected": -1.5120114088058472, |
|
"logps/chosen": -59.76226806640625, |
|
"logps/rejected": -75.82611083984375, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.515598177909851, |
|
"rewards/margins": 1.3365103006362915, |
|
"rewards/rejected": -2.8521084785461426, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.5222952887127286, |
|
"grad_norm": 38.75511169433594, |
|
"learning_rate": 3.71861511690251e-08, |
|
"logits/chosen": -1.4918270111083984, |
|
"logits/rejected": -1.4848248958587646, |
|
"logps/chosen": -62.17346954345703, |
|
"logps/rejected": -76.3984375, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.542625069618225, |
|
"rewards/margins": 1.2679589986801147, |
|
"rewards/rejected": -2.81058406829834, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.53670840464823, |
|
"grad_norm": 39.63160705566406, |
|
"learning_rate": 3.5013480372360373e-08, |
|
"logits/chosen": -1.5405272245407104, |
|
"logits/rejected": -1.5281922817230225, |
|
"logps/chosen": -65.55313873291016, |
|
"logps/rejected": -79.51661682128906, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5600948333740234, |
|
"rewards/margins": 1.3620662689208984, |
|
"rewards/rejected": -2.922161102294922, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.551121520583731, |
|
"grad_norm": 35.51730728149414, |
|
"learning_rate": 3.290142081943184e-08, |
|
"logits/chosen": -1.5395221710205078, |
|
"logits/rejected": -1.5303739309310913, |
|
"logps/chosen": -66.13626098632812, |
|
"logps/rejected": -80.83757019042969, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.5481364727020264, |
|
"rewards/margins": 1.3121622800827026, |
|
"rewards/rejected": -2.8602986335754395, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.5655346365192324, |
|
"grad_norm": 32.397037506103516, |
|
"learning_rate": 3.085056796411528e-08, |
|
"logits/chosen": -1.4974429607391357, |
|
"logits/rejected": -1.4794594049453735, |
|
"logps/chosen": -67.4955825805664, |
|
"logps/rejected": -81.5915298461914, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -1.6371116638183594, |
|
"rewards/margins": 1.2419166564941406, |
|
"rewards/rejected": -2.879027843475342, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.579947752454734, |
|
"grad_norm": 30.48466682434082, |
|
"learning_rate": 2.8861500004255328e-08, |
|
"logits/chosen": -1.5640184879302979, |
|
"logits/rejected": -1.5465617179870605, |
|
"logps/chosen": -62.553428649902344, |
|
"logps/rejected": -75.51476287841797, |
|
"loss": 0.4137, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.5564630031585693, |
|
"rewards/margins": 1.250534176826477, |
|
"rewards/rejected": -2.806997299194336, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.5943608683902353, |
|
"grad_norm": 33.36497497558594, |
|
"learning_rate": 2.6934777718653988e-08, |
|
"logits/chosen": -1.5691068172454834, |
|
"logits/rejected": -1.550433874130249, |
|
"logps/chosen": -64.53910827636719, |
|
"logps/rejected": -80.43321228027344, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.5547971725463867, |
|
"rewards/margins": 1.2979726791381836, |
|
"rewards/rejected": -2.8527698516845703, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6087739843257363, |
|
"grad_norm": 22.623056411743164, |
|
"learning_rate": 2.507094430897e-08, |
|
"logits/chosen": -1.5123775005340576, |
|
"logits/rejected": -1.4988584518432617, |
|
"logps/chosen": -62.6684684753418, |
|
"logps/rejected": -78.53128051757812, |
|
"loss": 0.3838, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5202034711837769, |
|
"rewards/margins": 1.3458402156829834, |
|
"rewards/rejected": -2.8660435676574707, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.6231871002612377, |
|
"grad_norm": 27.248733520507812, |
|
"learning_rate": 2.3270525246573717e-08, |
|
"logits/chosen": -1.5637412071228027, |
|
"logits/rejected": -1.5629525184631348, |
|
"logps/chosen": -63.67070770263672, |
|
"logps/rejected": -78.90650939941406, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -1.4608145952224731, |
|
"rewards/margins": 1.333916187286377, |
|
"rewards/rejected": -2.7947306632995605, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.637600216196739, |
|
"grad_norm": 44.09688186645508, |
|
"learning_rate": 2.153402812440075e-08, |
|
"logits/chosen": -1.5495104789733887, |
|
"logits/rejected": -1.5366135835647583, |
|
"logps/chosen": -62.17615509033203, |
|
"logps/rejected": -77.5916748046875, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.4907768964767456, |
|
"rewards/margins": 1.2403948307037354, |
|
"rewards/rejected": -2.7311718463897705, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.65201333213224, |
|
"grad_norm": 39.35377883911133, |
|
"learning_rate": 1.9861942513846126e-08, |
|
"logits/chosen": -1.5815684795379639, |
|
"logits/rejected": -1.570434808731079, |
|
"logps/chosen": -68.02040100097656, |
|
"logps/rejected": -82.66510009765625, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.595801830291748, |
|
"rewards/margins": 1.288498878479004, |
|
"rewards/rejected": -2.884300708770752, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.6664264480677415, |
|
"grad_norm": 35.275421142578125, |
|
"learning_rate": 1.8254739826739087e-08, |
|
"logits/chosen": -1.5678503513336182, |
|
"logits/rejected": -1.5563210248947144, |
|
"logps/chosen": -64.60910034179688, |
|
"logps/rejected": -81.83099365234375, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.5075803995132446, |
|
"rewards/margins": 1.4048585891723633, |
|
"rewards/rejected": -2.9124391078948975, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.680839564003243, |
|
"grad_norm": 36.913028717041016, |
|
"learning_rate": 1.6712873182437915e-08, |
|
"logits/chosen": -1.6048858165740967, |
|
"logits/rejected": -1.5977307558059692, |
|
"logps/chosen": -62.97712326049805, |
|
"logps/rejected": -77.24687194824219, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5249515771865845, |
|
"rewards/margins": 1.1899343729019165, |
|
"rewards/rejected": -2.714885711669922, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.6952526799387444, |
|
"grad_norm": 30.57379150390625, |
|
"learning_rate": 1.5236777280081603e-08, |
|
"logits/chosen": -1.545700192451477, |
|
"logits/rejected": -1.527111291885376, |
|
"logps/chosen": -60.37244415283203, |
|
"logps/rejected": -75.10139465332031, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4861081838607788, |
|
"rewards/margins": 1.261631965637207, |
|
"rewards/rejected": -2.7477405071258545, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.709665795874246, |
|
"grad_norm": 34.18118667602539, |
|
"learning_rate": 1.3826868276035103e-08, |
|
"logits/chosen": -1.5901832580566406, |
|
"logits/rejected": -1.5766841173171997, |
|
"logps/chosen": -66.63416290283203, |
|
"logps/rejected": -81.32105255126953, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.4631272554397583, |
|
"rewards/margins": 1.3422093391418457, |
|
"rewards/rejected": -2.8053364753723145, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.724078911809747, |
|
"grad_norm": 27.860313415527344, |
|
"learning_rate": 1.2483543666562097e-08, |
|
"logits/chosen": -1.5422031879425049, |
|
"logits/rejected": -1.5396907329559326, |
|
"logps/chosen": -59.41462326049805, |
|
"logps/rejected": -74.75579071044922, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.4775583744049072, |
|
"rewards/margins": 1.207562804222107, |
|
"rewards/rejected": -2.6851210594177246, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.7384920277452482, |
|
"grad_norm": 28.36042022705078, |
|
"learning_rate": 1.1207182175758585e-08, |
|
"logits/chosen": -1.548557996749878, |
|
"logits/rejected": -1.534714937210083, |
|
"logps/chosen": -59.4514045715332, |
|
"logps/rejected": -74.65626525878906, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3794094324111938, |
|
"rewards/margins": 1.263185739517212, |
|
"rewards/rejected": -2.642595052719116, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7529051436807492, |
|
"grad_norm": 27.59717559814453, |
|
"learning_rate": 9.998143648779434e-09, |
|
"logits/chosen": -1.5367798805236816, |
|
"logits/rejected": -1.5305159091949463, |
|
"logps/chosen": -61.03411102294922, |
|
"logps/rejected": -76.11595153808594, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4926228523254395, |
|
"rewards/margins": 1.2798919677734375, |
|
"rewards/rejected": -2.772514820098877, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.7673182596162507, |
|
"grad_norm": 32.14241027832031, |
|
"learning_rate": 8.856768950386478e-09, |
|
"logits/chosen": -1.577178716659546, |
|
"logits/rejected": -1.5694324970245361, |
|
"logps/chosen": -57.841270446777344, |
|
"logps/rejected": -71.33273315429688, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -1.4872713088989258, |
|
"rewards/margins": 1.0947293043136597, |
|
"rewards/rejected": -2.582000255584717, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.781731375551752, |
|
"grad_norm": 29.319686889648438, |
|
"learning_rate": 7.783379868849e-09, |
|
"logits/chosen": -1.5487927198410034, |
|
"logits/rejected": -1.5374157428741455, |
|
"logps/chosen": -64.15359497070312, |
|
"logps/rejected": -78.96765899658203, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.4542877674102783, |
|
"rewards/margins": 1.3190295696258545, |
|
"rewards/rejected": -2.773317337036133, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.7961444914872535, |
|
"grad_norm": 34.06153869628906, |
|
"learning_rate": 6.778279025221212e-09, |
|
"logits/chosen": -1.5599983930587769, |
|
"logits/rejected": -1.5473382472991943, |
|
"logps/chosen": -62.492393493652344, |
|
"logps/rejected": -79.02606964111328, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.478379726409912, |
|
"rewards/margins": 1.287481665611267, |
|
"rewards/rejected": -2.7658615112304688, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.810557607422755, |
|
"grad_norm": 31.845796585083008, |
|
"learning_rate": 5.841749788024791e-09, |
|
"logits/chosen": -1.5394327640533447, |
|
"logits/rejected": -1.5315256118774414, |
|
"logps/chosen": -60.67429733276367, |
|
"logps/rejected": -74.8219223022461, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.486366629600525, |
|
"rewards/margins": 1.1716400384902954, |
|
"rewards/rejected": -2.6580066680908203, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.824970723358256, |
|
"grad_norm": 29.72622299194336, |
|
"learning_rate": 4.974056193358084e-09, |
|
"logits/chosen": -1.5817995071411133, |
|
"logits/rejected": -1.561623454093933, |
|
"logps/chosen": -66.63758850097656, |
|
"logps/rejected": -82.53173065185547, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.407747745513916, |
|
"rewards/margins": 1.4503003358840942, |
|
"rewards/rejected": -2.8580479621887207, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.8393838392937574, |
|
"grad_norm": 33.73102951049805, |
|
"learning_rate": 4.175442870456708e-09, |
|
"logits/chosen": -1.510568618774414, |
|
"logits/rejected": -1.4960753917694092, |
|
"logps/chosen": -62.57592010498047, |
|
"logps/rejected": -76.44905090332031, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.516035795211792, |
|
"rewards/margins": 1.1771646738052368, |
|
"rewards/rejected": -2.6932003498077393, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.8537969552292584, |
|
"grad_norm": 37.108036041259766, |
|
"learning_rate": 3.44613497272489e-09, |
|
"logits/chosen": -1.5102002620697021, |
|
"logits/rejected": -1.4990047216415405, |
|
"logps/chosen": -64.62440490722656, |
|
"logps/rejected": -77.6097412109375, |
|
"loss": 0.4306, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4141771793365479, |
|
"rewards/margins": 1.1328486204147339, |
|
"rewards/rejected": -2.547025680541992, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.86821007116476, |
|
"grad_norm": 28.47416114807129, |
|
"learning_rate": 2.786338114258019e-09, |
|
"logits/chosen": -1.5360634326934814, |
|
"logits/rejected": -1.5190080404281616, |
|
"logps/chosen": -62.32354736328125, |
|
"logps/rejected": -79.61341094970703, |
|
"loss": 0.3866, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.382712721824646, |
|
"rewards/margins": 1.3544337749481201, |
|
"rewards/rejected": -2.7371468544006348, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.882623187100261, |
|
"grad_norm": 33.172950744628906, |
|
"learning_rate": 2.1962383118736828e-09, |
|
"logits/chosen": -1.5397237539291382, |
|
"logits/rejected": -1.5202221870422363, |
|
"logps/chosen": -61.6288948059082, |
|
"logps/rejected": -77.49903869628906, |
|
"loss": 0.3673, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -1.417991280555725, |
|
"rewards/margins": 1.4083993434906006, |
|
"rewards/rejected": -2.826390504837036, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8970363030357626, |
|
"grad_norm": 41.858070373535156, |
|
"learning_rate": 1.6760019326678698e-09, |
|
"logits/chosen": -1.5235203504562378, |
|
"logits/rejected": -1.5131398439407349, |
|
"logps/chosen": -62.8912353515625, |
|
"logps/rejected": -75.68180847167969, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.5128400325775146, |
|
"rewards/margins": 1.1097246408462524, |
|
"rewards/rejected": -2.6225647926330566, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.911449418971264, |
|
"grad_norm": 46.39678955078125, |
|
"learning_rate": 1.2257756471110437e-09, |
|
"logits/chosen": -1.5751731395721436, |
|
"logits/rejected": -1.5582482814788818, |
|
"logps/chosen": -64.07471466064453, |
|
"logps/rejected": -77.11708068847656, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -1.452874779701233, |
|
"rewards/margins": 1.1758126020431519, |
|
"rewards/rejected": -2.6286873817443848, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.925862534906765, |
|
"grad_norm": 44.330745697021484, |
|
"learning_rate": 8.456863876973586e-10, |
|
"logits/chosen": -1.5380871295928955, |
|
"logits/rejected": -1.531022310256958, |
|
"logps/chosen": -61.59229278564453, |
|
"logps/rejected": -74.27851867675781, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5201137065887451, |
|
"rewards/margins": 1.1149994134902954, |
|
"rewards/rejected": -2.635113000869751, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.9402756508422665, |
|
"grad_norm": 33.99905014038086, |
|
"learning_rate": 5.358413131582861e-10, |
|
"logits/chosen": -1.5820564031600952, |
|
"logits/rejected": -1.5623215436935425, |
|
"logps/chosen": -62.37836837768555, |
|
"logps/rejected": -79.43901062011719, |
|
"loss": 0.3971, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.5787489414215088, |
|
"rewards/margins": 1.3647538423538208, |
|
"rewards/rejected": -2.943502902984619, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.954688766777768, |
|
"grad_norm": 32.752960205078125, |
|
"learning_rate": 2.963277782515872e-10, |
|
"logits/chosen": -1.581578016281128, |
|
"logits/rejected": -1.5734798908233643, |
|
"logps/chosen": -63.77600860595703, |
|
"logps/rejected": -78.80097198486328, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -1.5265841484069824, |
|
"rewards/margins": 1.2426446676254272, |
|
"rewards/rejected": -2.769228935241699, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.969101882713269, |
|
"grad_norm": 28.23070526123047, |
|
"learning_rate": 1.272133091331229e-10, |
|
"logits/chosen": -1.5508421659469604, |
|
"logits/rejected": -1.5296354293823242, |
|
"logps/chosen": -57.603782653808594, |
|
"logps/rejected": -73.52263641357422, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.363713026046753, |
|
"rewards/margins": 1.3004621267318726, |
|
"rewards/rejected": -2.664175510406494, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.9835149986487703, |
|
"grad_norm": 37.76509475708008, |
|
"learning_rate": 2.8545584319361605e-11, |
|
"logits/chosen": -1.5593769550323486, |
|
"logits/rejected": -1.5454738140106201, |
|
"logps/chosen": -59.01500701904297, |
|
"logps/rejected": -72.01415252685547, |
|
"loss": 0.4342, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4014034271240234, |
|
"rewards/margins": 1.1465706825256348, |
|
"rewards/rejected": -2.547974109649658, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.9964868029907215, |
|
"step": 2079, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5121967236028949, |
|
"train_runtime": 18760.8926, |
|
"train_samples_per_second": 3.55, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2079, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|