|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998828811243412, |
|
"eval_steps": 75, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.024985360140542652, |
|
"grad_norm": 16.803013672547674, |
|
"learning_rate": 4e-09, |
|
"logits/chosen": -0.7169057726860046, |
|
"logits/rejected": -0.7742066979408264, |
|
"logps/chosen": -158.30039978027344, |
|
"logps/rejected": -167.5013427734375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.0021153492853045464, |
|
"rewards/margins": 0.0017622699961066246, |
|
"rewards/rejected": 0.00035307969665154815, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.049970720281085304, |
|
"grad_norm": 17.42934158508575, |
|
"learning_rate": 8e-09, |
|
"logits/chosen": -0.6620150804519653, |
|
"logits/rejected": -0.7335376143455505, |
|
"logps/chosen": -166.97694396972656, |
|
"logps/rejected": -166.01077270507812, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.54296875, |
|
"rewards/chosen": 0.001203760621137917, |
|
"rewards/margins": 0.0035094027407467365, |
|
"rewards/rejected": -0.002305642468854785, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07495608042162795, |
|
"grad_norm": 17.33795202273814, |
|
"learning_rate": 1.1999999999999998e-08, |
|
"logits/chosen": -0.7035447359085083, |
|
"logits/rejected": -0.7770529985427856, |
|
"logps/chosen": -160.94981384277344, |
|
"logps/rejected": -169.4982147216797, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0030440501868724823, |
|
"rewards/margins": -0.003373978193849325, |
|
"rewards/rejected": 0.006418028846383095, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09994144056217061, |
|
"grad_norm": 16.8202461847326, |
|
"learning_rate": 1.6e-08, |
|
"logits/chosen": -0.6711893677711487, |
|
"logits/rejected": -0.7459686994552612, |
|
"logps/chosen": -164.15184020996094, |
|
"logps/rejected": -180.4791259765625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.49609375, |
|
"rewards/chosen": 0.0023907795548439026, |
|
"rewards/margins": -0.002676962874829769, |
|
"rewards/rejected": 0.005067741964012384, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12492680070271325, |
|
"grad_norm": 17.3204666289138, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -0.6638763546943665, |
|
"logits/rejected": -0.7240799069404602, |
|
"logps/chosen": -165.18699645996094, |
|
"logps/rejected": -153.55906677246094, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0023628827184438705, |
|
"rewards/margins": 0.002067561261355877, |
|
"rewards/rejected": 0.00029532192274928093, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1499121608432559, |
|
"grad_norm": 18.418398156120897, |
|
"learning_rate": 2.3999999999999997e-08, |
|
"logits/chosen": -0.7016454935073853, |
|
"logits/rejected": -0.7838542461395264, |
|
"logps/chosen": -159.6986083984375, |
|
"logps/rejected": -275.36236572265625, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.0019385786727070808, |
|
"rewards/margins": 0.0008969469927251339, |
|
"rewards/rejected": -0.0028355256654322147, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17489752098379854, |
|
"grad_norm": 17.791533019243598, |
|
"learning_rate": 2.8000000000000003e-08, |
|
"logits/chosen": -0.6400444507598877, |
|
"logits/rejected": -0.7088255882263184, |
|
"logps/chosen": -159.34640502929688, |
|
"logps/rejected": -162.47824096679688, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.51953125, |
|
"rewards/chosen": 0.0004528433782979846, |
|
"rewards/margins": 0.0028284057043492794, |
|
"rewards/rejected": -0.0023755626752972603, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19988288112434122, |
|
"grad_norm": 17.319201876452443, |
|
"learning_rate": 3.2e-08, |
|
"logits/chosen": -0.6704908609390259, |
|
"logits/rejected": -0.7314557433128357, |
|
"logps/chosen": -160.60862731933594, |
|
"logps/rejected": -166.46450805664062, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.001064170734025538, |
|
"rewards/margins": 3.689667209982872e-06, |
|
"rewards/rejected": 0.0010604818817228079, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22486824126488386, |
|
"grad_norm": 16.072247731283092, |
|
"learning_rate": 3.6e-08, |
|
"logits/chosen": -0.6766926050186157, |
|
"logits/rejected": -0.7459310293197632, |
|
"logps/chosen": -162.13914489746094, |
|
"logps/rejected": -191.6351318359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.50390625, |
|
"rewards/chosen": 0.005069206468760967, |
|
"rewards/margins": 0.0026566418819129467, |
|
"rewards/rejected": 0.0024125645868480206, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2498536014054265, |
|
"grad_norm": 17.359137433037688, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -0.6625803709030151, |
|
"logits/rejected": -0.7203136682510376, |
|
"logps/chosen": -156.5331573486328, |
|
"logps/rejected": -222.60467529296875, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.47265625, |
|
"rewards/chosen": 0.001491243951022625, |
|
"rewards/margins": -0.0024129198864102364, |
|
"rewards/rejected": 0.0039041636046022177, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27483896154596915, |
|
"grad_norm": 17.83000320149723, |
|
"learning_rate": 4.4e-08, |
|
"logits/chosen": -0.6529428958892822, |
|
"logits/rejected": -0.7184248566627502, |
|
"logps/chosen": -161.4114990234375, |
|
"logps/rejected": -171.13998413085938, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.54296875, |
|
"rewards/chosen": 0.003907513804733753, |
|
"rewards/margins": 0.0027447110041975975, |
|
"rewards/rejected": 0.0011628026841208339, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2998243216865118, |
|
"grad_norm": 18.22059882059828, |
|
"learning_rate": 4.799999999999999e-08, |
|
"logits/chosen": -0.6906304955482483, |
|
"logits/rejected": -0.7680624723434448, |
|
"logps/chosen": -171.14309692382812, |
|
"logps/rejected": -251.1785888671875, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.004641087260097265, |
|
"rewards/margins": 0.0015279713552445173, |
|
"rewards/rejected": 0.0031131161376833916, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32480968182705444, |
|
"grad_norm": 16.628845467432836, |
|
"learning_rate": 5.2e-08, |
|
"logits/chosen": -0.6719599962234497, |
|
"logits/rejected": -0.743903636932373, |
|
"logps/chosen": -159.7659912109375, |
|
"logps/rejected": -191.9639129638672, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004612984135746956, |
|
"rewards/margins": 0.00576308136805892, |
|
"rewards/rejected": -0.001150098629295826, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3497950419675971, |
|
"grad_norm": 17.41091157106264, |
|
"learning_rate": 5.6000000000000005e-08, |
|
"logits/chosen": -0.6741428375244141, |
|
"logits/rejected": -0.7584381699562073, |
|
"logps/chosen": -162.17498779296875, |
|
"logps/rejected": -213.79025268554688, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.55859375, |
|
"rewards/chosen": 0.011129561811685562, |
|
"rewards/margins": 0.009353543631732464, |
|
"rewards/rejected": 0.0017760168993845582, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3747804021081398, |
|
"grad_norm": 17.981525024641783, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": -0.6742160320281982, |
|
"logits/rejected": -0.7543560266494751, |
|
"logps/chosen": -164.4986114501953, |
|
"logps/rejected": -202.91433715820312, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.52734375, |
|
"rewards/chosen": 0.006884717848151922, |
|
"rewards/margins": 0.0076943556778132915, |
|
"rewards/rejected": -0.000809638062492013, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39976576224868243, |
|
"grad_norm": 17.109113741756868, |
|
"learning_rate": 6.4e-08, |
|
"logits/chosen": -0.6298938989639282, |
|
"logits/rejected": -0.7012688517570496, |
|
"logps/chosen": -160.2120361328125, |
|
"logps/rejected": -160.2590789794922, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.010141907259821892, |
|
"rewards/margins": 0.007148087956011295, |
|
"rewards/rejected": 0.00299381953664124, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4247511223892251, |
|
"grad_norm": 17.727097440594363, |
|
"learning_rate": 6.8e-08, |
|
"logits/chosen": -0.6654178500175476, |
|
"logits/rejected": -0.7475502490997314, |
|
"logps/chosen": -174.74790954589844, |
|
"logps/rejected": -165.39630126953125, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.63671875, |
|
"rewards/chosen": 0.022659441456198692, |
|
"rewards/margins": 0.020327560603618622, |
|
"rewards/rejected": 0.002331881085410714, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4497364825297677, |
|
"grad_norm": 16.090260229163803, |
|
"learning_rate": 7.2e-08, |
|
"logits/chosen": -0.6315876841545105, |
|
"logits/rejected": -0.7063596844673157, |
|
"logps/chosen": -158.3714141845703, |
|
"logps/rejected": -169.9803924560547, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.62109375, |
|
"rewards/chosen": 0.02803650312125683, |
|
"rewards/margins": 0.024142108857631683, |
|
"rewards/rejected": 0.0038943937979638577, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.47472184267031037, |
|
"grad_norm": 17.989654826552492, |
|
"learning_rate": 7.599999999999999e-08, |
|
"logits/chosen": -0.6781339645385742, |
|
"logits/rejected": -0.749303936958313, |
|
"logps/chosen": -158.1745147705078, |
|
"logps/rejected": -174.4684295654297, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.035830847918987274, |
|
"rewards/margins": 0.030665559694170952, |
|
"rewards/rejected": 0.005165286362171173, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.499707202810853, |
|
"grad_norm": 16.659252701258378, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -0.6752834916114807, |
|
"logits/rejected": -0.7593508958816528, |
|
"logps/chosen": -171.382568359375, |
|
"logps/rejected": -163.63429260253906, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03702434524893761, |
|
"rewards/margins": 0.027536926791071892, |
|
"rewards/rejected": 0.009487415663897991, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5246925629513957, |
|
"grad_norm": 16.32043656758089, |
|
"learning_rate": 8.4e-08, |
|
"logits/chosen": -0.6655137538909912, |
|
"logits/rejected": -0.7505479454994202, |
|
"logps/chosen": -169.40570068359375, |
|
"logps/rejected": -204.10903930664062, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03623414784669876, |
|
"rewards/margins": 0.026394760236144066, |
|
"rewards/rejected": 0.00983938854187727, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5496779230919383, |
|
"grad_norm": 15.622546817842467, |
|
"learning_rate": 8.8e-08, |
|
"logits/chosen": -0.6490427255630493, |
|
"logits/rejected": -0.7285715937614441, |
|
"logps/chosen": -156.1206817626953, |
|
"logps/rejected": -163.50775146484375, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.68359375, |
|
"rewards/chosen": 0.03998086601495743, |
|
"rewards/margins": 0.03349429368972778, |
|
"rewards/rejected": 0.006486575584858656, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.574663283232481, |
|
"grad_norm": 16.184465147770208, |
|
"learning_rate": 9.2e-08, |
|
"logits/chosen": -0.6619192361831665, |
|
"logits/rejected": -0.7221629023551941, |
|
"logps/chosen": -165.5501251220703, |
|
"logps/rejected": -181.84275817871094, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.72265625, |
|
"rewards/chosen": 0.054740943014621735, |
|
"rewards/margins": 0.04307195544242859, |
|
"rewards/rejected": 0.011668986640870571, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5996486433730236, |
|
"grad_norm": 16.443566147600368, |
|
"learning_rate": 9.599999999999999e-08, |
|
"logits/chosen": -0.6748422384262085, |
|
"logits/rejected": -0.7496626973152161, |
|
"logps/chosen": -164.22450256347656, |
|
"logps/rejected": -171.41998291015625, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.75390625, |
|
"rewards/chosen": 0.07237192243337631, |
|
"rewards/margins": 0.06578801572322845, |
|
"rewards/rejected": 0.006583897862583399, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6246340035135662, |
|
"grad_norm": 15.12731079703187, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.6667000651359558, |
|
"logits/rejected": -0.7247492074966431, |
|
"logps/chosen": -152.322998046875, |
|
"logps/rejected": -198.47271728515625, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": 0.08942899107933044, |
|
"rewards/margins": 0.06775067746639252, |
|
"rewards/rejected": 0.021678313612937927, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6496193636541089, |
|
"grad_norm": 14.509532935330286, |
|
"learning_rate": 1.04e-07, |
|
"logits/chosen": -0.6652993559837341, |
|
"logits/rejected": -0.761294960975647, |
|
"logps/chosen": -161.75856018066406, |
|
"logps/rejected": -202.80789184570312, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.80859375, |
|
"rewards/chosen": 0.10889974981546402, |
|
"rewards/margins": 0.10048462450504303, |
|
"rewards/rejected": 0.008415117859840393, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6746047237946515, |
|
"grad_norm": 13.828860573682139, |
|
"learning_rate": 1.08e-07, |
|
"logits/chosen": -0.6644891500473022, |
|
"logits/rejected": -0.7297866940498352, |
|
"logps/chosen": -167.40745544433594, |
|
"logps/rejected": -191.5254669189453, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.76953125, |
|
"rewards/chosen": 0.10965421050786972, |
|
"rewards/margins": 0.09407318383455276, |
|
"rewards/rejected": 0.015581016428768635, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6995900839351942, |
|
"grad_norm": 14.094010354912754, |
|
"learning_rate": 1.1200000000000001e-07, |
|
"logits/chosen": -0.6829609274864197, |
|
"logits/rejected": -0.7518411874771118, |
|
"logps/chosen": -161.49169921875, |
|
"logps/rejected": -194.95101928710938, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.75390625, |
|
"rewards/chosen": 0.10309572517871857, |
|
"rewards/margins": 0.10116783529520035, |
|
"rewards/rejected": 0.0019278817344456911, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7245754440757368, |
|
"grad_norm": 14.33157630919911, |
|
"learning_rate": 1.1599999999999999e-07, |
|
"logits/chosen": -0.678025484085083, |
|
"logits/rejected": -0.7501699924468994, |
|
"logps/chosen": -163.56793212890625, |
|
"logps/rejected": -198.6992950439453, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.77734375, |
|
"rewards/chosen": 0.12135004997253418, |
|
"rewards/margins": 0.11319853365421295, |
|
"rewards/rejected": 0.008151513524353504, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7495608042162796, |
|
"grad_norm": 14.250114975850236, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -0.6545270681381226, |
|
"logits/rejected": -0.742324709892273, |
|
"logps/chosen": -177.452880859375, |
|
"logps/rejected": -257.645263671875, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.74609375, |
|
"rewards/chosen": 0.11762025952339172, |
|
"rewards/margins": 0.10628640651702881, |
|
"rewards/rejected": 0.011333855800330639, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7745461643568222, |
|
"grad_norm": 13.35663678176419, |
|
"learning_rate": 1.24e-07, |
|
"logits/chosen": -0.6429523229598999, |
|
"logits/rejected": -0.7005941867828369, |
|
"logps/chosen": -160.99609375, |
|
"logps/rejected": -158.8332061767578, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.75390625, |
|
"rewards/chosen": 0.11651378124952316, |
|
"rewards/margins": 0.11703144758939743, |
|
"rewards/rejected": -0.0005176601116545498, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.7995315244973649, |
|
"grad_norm": 13.344011017214148, |
|
"learning_rate": 1.28e-07, |
|
"logits/chosen": -0.671217143535614, |
|
"logits/rejected": -0.7481105923652649, |
|
"logps/chosen": -164.79576110839844, |
|
"logps/rejected": -214.5727081298828, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.79296875, |
|
"rewards/chosen": 0.13732488453388214, |
|
"rewards/margins": 0.14830084145069122, |
|
"rewards/rejected": -0.010975953191518784, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8245168846379075, |
|
"grad_norm": 11.603847422508784, |
|
"learning_rate": 1.32e-07, |
|
"logits/chosen": -0.6748225092887878, |
|
"logits/rejected": -0.7619199752807617, |
|
"logps/chosen": -165.9488525390625, |
|
"logps/rejected": -259.8753967285156, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.76171875, |
|
"rewards/chosen": 0.13191932439804077, |
|
"rewards/margins": 0.1852141171693802, |
|
"rewards/rejected": -0.05329480394721031, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8495022447784502, |
|
"grad_norm": 11.310140143623643, |
|
"learning_rate": 1.36e-07, |
|
"logits/chosen": -0.6559648513793945, |
|
"logits/rejected": -0.7262955904006958, |
|
"logps/chosen": -162.48326110839844, |
|
"logps/rejected": -168.70834350585938, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.80078125, |
|
"rewards/chosen": 0.15893952548503876, |
|
"rewards/margins": 0.24133484065532684, |
|
"rewards/rejected": -0.08239532262086868, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8744876049189928, |
|
"grad_norm": 11.076590712903254, |
|
"learning_rate": 1.3999999999999998e-07, |
|
"logits/chosen": -0.64164799451828, |
|
"logits/rejected": -0.6981642246246338, |
|
"logps/chosen": -159.47991943359375, |
|
"logps/rejected": -164.59423828125, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.11835081875324249, |
|
"rewards/margins": 0.2690027356147766, |
|
"rewards/rejected": -0.15065191686153412, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8994729650595354, |
|
"grad_norm": 11.30376463380917, |
|
"learning_rate": 1.44e-07, |
|
"logits/chosen": -0.6537081003189087, |
|
"logits/rejected": -0.7259843349456787, |
|
"logps/chosen": -163.25912475585938, |
|
"logps/rejected": -186.23190307617188, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.73828125, |
|
"rewards/chosen": 0.036043643951416016, |
|
"rewards/margins": 0.22015729546546936, |
|
"rewards/rejected": -0.18411365151405334, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.9244583252000781, |
|
"grad_norm": 11.470685518141812, |
|
"learning_rate": 1.48e-07, |
|
"logits/chosen": -0.6623800992965698, |
|
"logits/rejected": -0.7280963063240051, |
|
"logps/chosen": -163.83189392089844, |
|
"logps/rejected": -162.69908142089844, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": 0.0784626230597496, |
|
"rewards/margins": 0.33121681213378906, |
|
"rewards/rejected": -0.25275421142578125, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9369510052703494, |
|
"eval_logits/chosen": -0.6098010540008545, |
|
"eval_logits/rejected": -0.6948941946029663, |
|
"eval_logps/chosen": -174.5200653076172, |
|
"eval_logps/rejected": -156.43321228027344, |
|
"eval_loss": 0.5377179384231567, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 0.11009039729833603, |
|
"eval_rewards/margins": 0.3738202750682831, |
|
"eval_rewards/rejected": -0.26372990012168884, |
|
"eval_runtime": 29.7619, |
|
"eval_samples_per_second": 3.36, |
|
"eval_steps_per_second": 0.84, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9494436853406207, |
|
"grad_norm": 10.843228504877107, |
|
"learning_rate": 1.5199999999999998e-07, |
|
"logits/chosen": -0.6558808088302612, |
|
"logits/rejected": -0.7365143299102783, |
|
"logps/chosen": -167.81484985351562, |
|
"logps/rejected": -233.72686767578125, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.06528769433498383, |
|
"rewards/margins": 0.31673550605773926, |
|
"rewards/rejected": -0.2514478266239166, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.9744290454811634, |
|
"grad_norm": 11.247238407003374, |
|
"learning_rate": 1.56e-07, |
|
"logits/chosen": -0.6631561517715454, |
|
"logits/rejected": -0.732117772102356, |
|
"logps/chosen": -158.63388061523438, |
|
"logps/rejected": -186.37835693359375, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.80078125, |
|
"rewards/chosen": 0.03322272002696991, |
|
"rewards/margins": 0.3244516849517822, |
|
"rewards/rejected": -0.2912289500236511, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.999414405621706, |
|
"grad_norm": 10.873879267366776, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -0.678787887096405, |
|
"logits/rejected": -0.762289822101593, |
|
"logps/chosen": -173.52723693847656, |
|
"logps/rejected": -212.18414306640625, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.04449426010251045, |
|
"rewards/margins": 0.29196038842201233, |
|
"rewards/rejected": -0.24746613204479218, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0243997657622488, |
|
"grad_norm": 10.813522808672303, |
|
"learning_rate": 1.6399999999999999e-07, |
|
"logits/chosen": -0.6670259833335876, |
|
"logits/rejected": -0.731939971446991, |
|
"logps/chosen": -169.05836486816406, |
|
"logps/rejected": -183.41171264648438, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": 0.09290473908185959, |
|
"rewards/margins": 0.3946535289287567, |
|
"rewards/rejected": -0.3017488121986389, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.0493851259027913, |
|
"grad_norm": 10.85312267043124, |
|
"learning_rate": 1.68e-07, |
|
"logits/chosen": -0.6822367310523987, |
|
"logits/rejected": -0.7420221567153931, |
|
"logps/chosen": -168.18081665039062, |
|
"logps/rejected": -175.647705078125, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.017166346311569214, |
|
"rewards/margins": 0.35215744376182556, |
|
"rewards/rejected": -0.33499109745025635, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.074370486043334, |
|
"grad_norm": 10.234969231047268, |
|
"learning_rate": 1.7199999999999998e-07, |
|
"logits/chosen": -0.6367188096046448, |
|
"logits/rejected": -0.7021892666816711, |
|
"logps/chosen": -168.204345703125, |
|
"logps/rejected": -172.60887145996094, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.78515625, |
|
"rewards/chosen": 0.031016340479254723, |
|
"rewards/margins": 0.4050399959087372, |
|
"rewards/rejected": -0.3740236461162567, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.0993558461838766, |
|
"grad_norm": 10.400074349909037, |
|
"learning_rate": 1.76e-07, |
|
"logits/chosen": -0.6521391272544861, |
|
"logits/rejected": -0.7159854769706726, |
|
"logps/chosen": -165.23777770996094, |
|
"logps/rejected": -179.17791748046875, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.80859375, |
|
"rewards/chosen": 0.010984277352690697, |
|
"rewards/margins": 0.4423283338546753, |
|
"rewards/rejected": -0.43134409189224243, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.1243412063244194, |
|
"grad_norm": 10.94221462203906, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": -0.6484578847885132, |
|
"logits/rejected": -0.7167034149169922, |
|
"logps/chosen": -164.0042724609375, |
|
"logps/rejected": -189.57470703125, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.80859375, |
|
"rewards/chosen": 0.044118743389844894, |
|
"rewards/margins": 0.5543583035469055, |
|
"rewards/rejected": -0.5102395415306091, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.149326566464962, |
|
"grad_norm": 10.061790706384036, |
|
"learning_rate": 1.84e-07, |
|
"logits/chosen": -0.6450331211090088, |
|
"logits/rejected": -0.6989036798477173, |
|
"logps/chosen": -161.26258850097656, |
|
"logps/rejected": -176.26287841796875, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -0.09776711463928223, |
|
"rewards/margins": 0.5529555678367615, |
|
"rewards/rejected": -0.6507226824760437, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 10.181525940489752, |
|
"learning_rate": 1.88e-07, |
|
"logits/chosen": -0.6763854026794434, |
|
"logits/rejected": -0.7379953861236572, |
|
"logps/chosen": -162.50949096679688, |
|
"logps/rejected": -195.75962829589844, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.84765625, |
|
"rewards/chosen": -0.21141284704208374, |
|
"rewards/margins": 0.6165250539779663, |
|
"rewards/rejected": -0.8279378414154053, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.1992972867460472, |
|
"grad_norm": 9.59098495871882, |
|
"learning_rate": 1.9199999999999997e-07, |
|
"logits/chosen": -0.6583154201507568, |
|
"logits/rejected": -0.7248339653015137, |
|
"logps/chosen": -171.67164611816406, |
|
"logps/rejected": -204.2442626953125, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 0.83203125, |
|
"rewards/chosen": -0.22590558230876923, |
|
"rewards/margins": 0.7413816452026367, |
|
"rewards/rejected": -0.9672871828079224, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.22428264688659, |
|
"grad_norm": 9.611151986852143, |
|
"learning_rate": 1.9599999999999998e-07, |
|
"logits/chosen": -0.6739534139633179, |
|
"logits/rejected": -0.7209540605545044, |
|
"logps/chosen": -165.88185119628906, |
|
"logps/rejected": -198.14913940429688, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.83984375, |
|
"rewards/chosen": -0.4009418785572052, |
|
"rewards/margins": 0.8526190519332886, |
|
"rewards/rejected": -1.2535607814788818, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.2492680070271325, |
|
"grad_norm": 9.635547492152954, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.6529893279075623, |
|
"logits/rejected": -0.7117218971252441, |
|
"logps/chosen": -167.87130737304688, |
|
"logps/rejected": -199.54925537109375, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5539758205413818, |
|
"rewards/margins": 0.8746498823165894, |
|
"rewards/rejected": -1.4286257028579712, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2742533671676752, |
|
"grad_norm": 9.323014566726187, |
|
"learning_rate": 1.9945218953682733e-07, |
|
"logits/chosen": -0.6634210348129272, |
|
"logits/rejected": -0.7515499591827393, |
|
"logps/chosen": -179.8871307373047, |
|
"logps/rejected": -211.56712341308594, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -0.6504544615745544, |
|
"rewards/margins": 0.9477463364601135, |
|
"rewards/rejected": -1.598200798034668, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.2992387273082178, |
|
"grad_norm": 9.782518483212584, |
|
"learning_rate": 1.9781476007338056e-07, |
|
"logits/chosen": -0.6890003681182861, |
|
"logits/rejected": -0.7631358504295349, |
|
"logps/chosen": -178.91226196289062, |
|
"logps/rejected": -221.65196228027344, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.8466004729270935, |
|
"rewards/margins": 0.9782698154449463, |
|
"rewards/rejected": -1.8248703479766846, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.3242240874487605, |
|
"grad_norm": 9.676632000972155, |
|
"learning_rate": 1.9510565162951537e-07, |
|
"logits/chosen": -0.6939510703086853, |
|
"logits/rejected": -0.7790961861610413, |
|
"logps/chosen": -175.7962188720703, |
|
"logps/rejected": -225.33914184570312, |
|
"loss": 0.3898, |
|
"rewards/accuracies": 0.85546875, |
|
"rewards/chosen": -0.8194781541824341, |
|
"rewards/margins": 1.1897538900375366, |
|
"rewards/rejected": -2.0092320442199707, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.349209447589303, |
|
"grad_norm": 9.450466029531317, |
|
"learning_rate": 1.9135454576426007e-07, |
|
"logits/chosen": -0.6339809894561768, |
|
"logits/rejected": -0.7058581113815308, |
|
"logps/chosen": -176.75390625, |
|
"logps/rejected": -183.5647430419922, |
|
"loss": 0.3804, |
|
"rewards/accuracies": 0.83984375, |
|
"rewards/chosen": -0.9991697072982788, |
|
"rewards/margins": 1.0066075325012207, |
|
"rewards/rejected": -2.00577712059021, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.3741948077298458, |
|
"grad_norm": 9.045883620582659, |
|
"learning_rate": 1.8660254037844388e-07, |
|
"logits/chosen": -0.6670467853546143, |
|
"logits/rejected": -0.7262380123138428, |
|
"logps/chosen": -170.4173126220703, |
|
"logps/rejected": -232.4281005859375, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 0.89453125, |
|
"rewards/chosen": -0.9117798805236816, |
|
"rewards/margins": 1.2910921573638916, |
|
"rewards/rejected": -2.2028720378875732, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3991801678703886, |
|
"grad_norm": 8.752793412837908, |
|
"learning_rate": 1.8090169943749475e-07, |
|
"logits/chosen": -0.66654372215271, |
|
"logits/rejected": -0.738584041595459, |
|
"logps/chosen": -168.66757202148438, |
|
"logps/rejected": -189.63882446289062, |
|
"loss": 0.3534, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -0.9708907604217529, |
|
"rewards/margins": 1.3288507461547852, |
|
"rewards/rejected": -2.299741506576538, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.424165528010931, |
|
"grad_norm": 8.754108546199273, |
|
"learning_rate": 1.7431448254773942e-07, |
|
"logits/chosen": -0.6560633778572083, |
|
"logits/rejected": -0.7114984393119812, |
|
"logps/chosen": -163.83883666992188, |
|
"logps/rejected": -181.41429138183594, |
|
"loss": 0.3265, |
|
"rewards/accuracies": 0.86328125, |
|
"rewards/chosen": -1.0229204893112183, |
|
"rewards/margins": 1.4013088941574097, |
|
"rewards/rejected": -2.424229383468628, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.4491508881514736, |
|
"grad_norm": 8.842470960325368, |
|
"learning_rate": 1.669130606358858e-07, |
|
"logits/chosen": -0.6747975945472717, |
|
"logits/rejected": -0.7556227445602417, |
|
"logps/chosen": -177.47671508789062, |
|
"logps/rejected": -229.03138732910156, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.84765625, |
|
"rewards/chosen": -1.2031465768814087, |
|
"rewards/margins": 1.2502222061157227, |
|
"rewards/rejected": -2.453368663787842, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.4741362482920164, |
|
"grad_norm": 8.560944140870841, |
|
"learning_rate": 1.5877852522924732e-07, |
|
"logits/chosen": -0.6661523580551147, |
|
"logits/rejected": -0.7374821305274963, |
|
"logps/chosen": -179.45278930664062, |
|
"logps/rejected": -200.83184814453125, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.83984375, |
|
"rewards/chosen": -1.274424433708191, |
|
"rewards/margins": 1.3644013404846191, |
|
"rewards/rejected": -2.6388256549835205, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.4991216084325591, |
|
"grad_norm": 8.135071032264696, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -0.6814154982566833, |
|
"logits/rejected": -0.7541234493255615, |
|
"logps/chosen": -176.5241241455078, |
|
"logps/rejected": -178.12158203125, |
|
"loss": 0.3172, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.236649513244629, |
|
"rewards/margins": 1.4433623552322388, |
|
"rewards/rejected": -2.680011749267578, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5241069685731017, |
|
"grad_norm": 8.611381059926462, |
|
"learning_rate": 1.4067366430758004e-07, |
|
"logits/chosen": -0.7052810192108154, |
|
"logits/rejected": -0.7841841578483582, |
|
"logps/chosen": -176.52615356445312, |
|
"logps/rejected": -265.2866516113281, |
|
"loss": 0.3464, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": -1.2547199726104736, |
|
"rewards/margins": 1.5170029401779175, |
|
"rewards/rejected": -2.7717230319976807, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.5490923287136442, |
|
"grad_norm": 8.252873140772632, |
|
"learning_rate": 1.3090169943749475e-07, |
|
"logits/chosen": -0.6520602107048035, |
|
"logits/rejected": -0.7327940464019775, |
|
"logps/chosen": -176.3183135986328, |
|
"logps/rejected": -229.5253448486328, |
|
"loss": 0.3087, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": -1.2032685279846191, |
|
"rewards/margins": 1.7557697296142578, |
|
"rewards/rejected": -2.959038257598877, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.574077688854187, |
|
"grad_norm": 10.055814181219855, |
|
"learning_rate": 1.207911690817759e-07, |
|
"logits/chosen": -0.6678023338317871, |
|
"logits/rejected": -0.7291412949562073, |
|
"logps/chosen": -170.61351013183594, |
|
"logps/rejected": -191.97714233398438, |
|
"loss": 0.3451, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": -1.2367451190948486, |
|
"rewards/margins": 1.7602653503417969, |
|
"rewards/rejected": -2.9970104694366455, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.5990630489947297, |
|
"grad_norm": 7.755494267888693, |
|
"learning_rate": 1.1045284632676535e-07, |
|
"logits/chosen": -0.6732159852981567, |
|
"logits/rejected": -0.7458621263504028, |
|
"logps/chosen": -179.55528259277344, |
|
"logps/rejected": -194.04571533203125, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -1.2336257696151733, |
|
"rewards/margins": 1.7932240962982178, |
|
"rewards/rejected": -3.0268499851226807, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.6240484091352723, |
|
"grad_norm": 8.333802703761862, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.6721549034118652, |
|
"logits/rejected": -0.7458239793777466, |
|
"logps/chosen": -182.87872314453125, |
|
"logps/rejected": -235.57015991210938, |
|
"loss": 0.3049, |
|
"rewards/accuracies": 0.89453125, |
|
"rewards/chosen": -1.2039211988449097, |
|
"rewards/margins": 1.8136268854141235, |
|
"rewards/rejected": -3.017548084259033, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.6490337692758148, |
|
"grad_norm": 8.273848396450413, |
|
"learning_rate": 8.954715367323466e-08, |
|
"logits/chosen": -0.693534255027771, |
|
"logits/rejected": -0.7786884307861328, |
|
"logps/chosen": -180.05812072753906, |
|
"logps/rejected": -233.4104461669922, |
|
"loss": 0.2977, |
|
"rewards/accuracies": 0.92578125, |
|
"rewards/chosen": -1.2325382232666016, |
|
"rewards/margins": 1.9344900846481323, |
|
"rewards/rejected": -3.1670281887054443, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.6740191294163576, |
|
"grad_norm": 7.810872252646391, |
|
"learning_rate": 7.920883091822408e-08, |
|
"logits/chosen": -0.6568552255630493, |
|
"logits/rejected": -0.7265664935112, |
|
"logps/chosen": -176.93911743164062, |
|
"logps/rejected": -214.8884735107422, |
|
"loss": 0.2827, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -1.2643663883209229, |
|
"rewards/margins": 1.8012300729751587, |
|
"rewards/rejected": -3.065596580505371, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.6990044895569003, |
|
"grad_norm": 7.802511130852569, |
|
"learning_rate": 6.909830056250527e-08, |
|
"logits/chosen": -0.691701352596283, |
|
"logits/rejected": -0.7584172487258911, |
|
"logps/chosen": -180.76043701171875, |
|
"logps/rejected": -201.7536163330078, |
|
"loss": 0.2923, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -1.3751585483551025, |
|
"rewards/margins": 1.8911828994750977, |
|
"rewards/rejected": -3.2663414478302, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.723989849697443, |
|
"grad_norm": 7.661437671496506, |
|
"learning_rate": 5.9326335692419996e-08, |
|
"logits/chosen": -0.691138744354248, |
|
"logits/rejected": -0.7739748954772949, |
|
"logps/chosen": -179.13803100585938, |
|
"logps/rejected": -230.07017517089844, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -1.3471490144729614, |
|
"rewards/margins": 2.1134843826293945, |
|
"rewards/rejected": -3.4606332778930664, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.7489752098379856, |
|
"grad_norm": 7.425320729921027, |
|
"learning_rate": 5.000000000000002e-08, |
|
"logits/chosen": -0.6808772087097168, |
|
"logits/rejected": -0.7619104385375977, |
|
"logps/chosen": -178.50820922851562, |
|
"logps/rejected": -221.21847534179688, |
|
"loss": 0.2903, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -1.328997015953064, |
|
"rewards/margins": 1.9231306314468384, |
|
"rewards/rejected": -3.2521276473999023, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7739605699785281, |
|
"grad_norm": 7.517073157187585, |
|
"learning_rate": 4.1221474770752695e-08, |
|
"logits/chosen": -0.6579867005348206, |
|
"logits/rejected": -0.7242329716682434, |
|
"logps/chosen": -174.0385284423828, |
|
"logps/rejected": -226.00914001464844, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -1.3584879636764526, |
|
"rewards/margins": 1.9965893030166626, |
|
"rewards/rejected": -3.3550772666931152, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.798945930119071, |
|
"grad_norm": 7.848642238408611, |
|
"learning_rate": 3.3086939364114206e-08, |
|
"logits/chosen": -0.6827540397644043, |
|
"logits/rejected": -0.7463814616203308, |
|
"logps/chosen": -181.77685546875, |
|
"logps/rejected": -202.3132781982422, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -1.3786863088607788, |
|
"rewards/margins": 1.8975354433059692, |
|
"rewards/rejected": -3.276221990585327, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.8239312902596136, |
|
"grad_norm": 7.224249615030409, |
|
"learning_rate": 2.5685517452260564e-08, |
|
"logits/chosen": -0.6343103647232056, |
|
"logits/rejected": -0.7141076326370239, |
|
"logps/chosen": -190.22946166992188, |
|
"logps/rejected": -238.4442901611328, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.3857475519180298, |
|
"rewards/margins": 2.068246841430664, |
|
"rewards/rejected": -3.4539945125579834, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.8489166504001562, |
|
"grad_norm": 7.7461193794138135, |
|
"learning_rate": 1.9098300562505266e-08, |
|
"logits/chosen": -0.6793495416641235, |
|
"logits/rejected": -0.7641343474388123, |
|
"logps/chosen": -187.63839721679688, |
|
"logps/rejected": -191.110107421875, |
|
"loss": 0.2835, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.504347801208496, |
|
"rewards/margins": 1.7995954751968384, |
|
"rewards/rejected": -3.303943395614624, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.8739020105406987, |
|
"grad_norm": 7.689771419387666, |
|
"learning_rate": 1.3397459621556128e-08, |
|
"logits/chosen": -0.6912616491317749, |
|
"logits/rejected": -0.7680445313453674, |
|
"logps/chosen": -182.9419708251953, |
|
"logps/rejected": -200.2938690185547, |
|
"loss": 0.2655, |
|
"rewards/accuracies": 0.9296875, |
|
"rewards/chosen": -1.4619263410568237, |
|
"rewards/margins": 2.098031759262085, |
|
"rewards/rejected": -3.559957981109619, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8739020105406987, |
|
"eval_logits/chosen": -0.6168845891952515, |
|
"eval_logits/rejected": -0.7024461627006531, |
|
"eval_logps/chosen": -189.5503692626953, |
|
"eval_logps/rejected": -188.0495147705078, |
|
"eval_loss": 0.26570188999176025, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": -1.3929405212402344, |
|
"eval_rewards/margins": 2.0324180126190186, |
|
"eval_rewards/rejected": -3.425358533859253, |
|
"eval_runtime": 29.4565, |
|
"eval_samples_per_second": 3.395, |
|
"eval_steps_per_second": 0.849, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8988873706812415, |
|
"grad_norm": 8.195993031555705, |
|
"learning_rate": 8.645454235739902e-09, |
|
"logits/chosen": -0.676539957523346, |
|
"logits/rejected": -0.7394694685935974, |
|
"logps/chosen": -181.1811065673828, |
|
"logps/rejected": -207.39080810546875, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -1.422343373298645, |
|
"rewards/margins": 1.9836503267288208, |
|
"rewards/rejected": -3.405993700027466, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.9238727308217842, |
|
"grad_norm": 7.242463445087286, |
|
"learning_rate": 4.8943483704846465e-09, |
|
"logits/chosen": -0.656994104385376, |
|
"logits/rejected": -0.7103748321533203, |
|
"logps/chosen": -183.3748321533203, |
|
"logps/rejected": -188.81219482421875, |
|
"loss": 0.2718, |
|
"rewards/accuracies": 0.89453125, |
|
"rewards/chosen": -1.4463797807693481, |
|
"rewards/margins": 1.880941390991211, |
|
"rewards/rejected": -3.3273210525512695, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.9488580909623268, |
|
"grad_norm": 6.9367447852277575, |
|
"learning_rate": 2.1852399266194312e-09, |
|
"logits/chosen": -0.6909129619598389, |
|
"logits/rejected": -0.7760818004608154, |
|
"logps/chosen": -177.80374145507812, |
|
"logps/rejected": -266.32806396484375, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.92578125, |
|
"rewards/chosen": -1.4405275583267212, |
|
"rewards/margins": 2.0439579486846924, |
|
"rewards/rejected": -3.484485626220703, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.9738434511028693, |
|
"grad_norm": 7.621192478848495, |
|
"learning_rate": 5.47810463172671e-10, |
|
"logits/chosen": -0.6629250049591064, |
|
"logits/rejected": -0.7373142242431641, |
|
"logps/chosen": -183.51670837402344, |
|
"logps/rejected": -190.675537109375, |
|
"loss": 0.2845, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -1.451604962348938, |
|
"rewards/margins": 2.0776655673980713, |
|
"rewards/rejected": -3.5292704105377197, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.998828811243412, |
|
"grad_norm": 7.292176844709846, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.745610773563385, |
|
"logits/rejected": -0.8196827173233032, |
|
"logps/chosen": -172.93553161621094, |
|
"logps/rejected": -271.3219299316406, |
|
"loss": 0.264, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -1.2879290580749512, |
|
"rewards/margins": 2.1956043243408203, |
|
"rewards/rejected": -3.4835333824157715, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|