|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987908101571947, |
|
"eval_steps": 10000000, |
|
"global_step": 413, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1308.2122296641476, |
|
"learning_rate": 2.3809523809523806e-09, |
|
"logits/chosen": -2.7005977630615234, |
|
"logits/rejected": -2.6288318634033203, |
|
"logps/chosen": -1.1158788204193115, |
|
"logps/rejected": -1.1333446502685547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1049.0503356036236, |
|
"learning_rate": 2.3809523809523807e-08, |
|
"logits/chosen": -2.762399435043335, |
|
"logits/rejected": -2.6968984603881836, |
|
"logps/chosen": -0.8372963070869446, |
|
"logps/rejected": -0.8176102638244629, |
|
"loss": 0.7125, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": 0.03224152699112892, |
|
"rewards/margins": 0.04410284012556076, |
|
"rewards/rejected": -0.011861314065754414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1236.016019303981, |
|
"learning_rate": 4.7619047619047613e-08, |
|
"logits/chosen": -2.689128875732422, |
|
"logits/rejected": -2.64937686920166, |
|
"logps/chosen": -0.9927361607551575, |
|
"logps/rejected": -1.03745436668396, |
|
"loss": 0.7127, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09930654615163803, |
|
"rewards/margins": 0.010919012129306793, |
|
"rewards/rejected": -0.11022555828094482, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1468.1565781012905, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/chosen": -2.7302658557891846, |
|
"logits/rejected": -2.6768224239349365, |
|
"logps/chosen": -0.967939555644989, |
|
"logps/rejected": -0.9760215878486633, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1426212042570114, |
|
"rewards/margins": 0.09910523146390915, |
|
"rewards/rejected": -0.24172644317150116, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1718.8019373304546, |
|
"learning_rate": 9.523809523809523e-08, |
|
"logits/chosen": -2.6734580993652344, |
|
"logits/rejected": -2.6297881603240967, |
|
"logps/chosen": -0.9952117204666138, |
|
"logps/rejected": -0.920923113822937, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6706060171127319, |
|
"rewards/margins": 0.4042028486728668, |
|
"rewards/rejected": -1.0748088359832764, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 886.1855666160817, |
|
"learning_rate": 9.988531521210217e-08, |
|
"logits/chosen": -2.739663600921631, |
|
"logits/rejected": -2.6859798431396484, |
|
"logps/chosen": -0.9715211987495422, |
|
"logps/rejected": -0.9160677790641785, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4754611551761627, |
|
"rewards/margins": 0.44252967834472656, |
|
"rewards/rejected": -0.9179908633232117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1659.7392402989283, |
|
"learning_rate": 9.94203097871474e-08, |
|
"logits/chosen": -2.743115186691284, |
|
"logits/rejected": -2.676964521408081, |
|
"logps/chosen": -0.9205001592636108, |
|
"logps/rejected": -0.9147119522094727, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4151291251182556, |
|
"rewards/margins": 0.4165617823600769, |
|
"rewards/rejected": -0.8316909074783325, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1314.7754621298307, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": -2.744157314300537, |
|
"logits/rejected": -2.7204043865203857, |
|
"logps/chosen": -0.9306680560112, |
|
"logps/rejected": -0.8743250966072083, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3868916630744934, |
|
"rewards/margins": 0.6596783399581909, |
|
"rewards/rejected": -1.046570062637329, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1220.0578782834486, |
|
"learning_rate": 9.743369330335185e-08, |
|
"logits/chosen": -2.6871607303619385, |
|
"logits/rejected": -2.6331558227539062, |
|
"logps/chosen": -0.901233971118927, |
|
"logps/rejected": -0.9974308013916016, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6774497032165527, |
|
"rewards/margins": 0.5885565876960754, |
|
"rewards/rejected": -1.266006350517273, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 906.8663763416491, |
|
"learning_rate": 9.592631884948653e-08, |
|
"logits/chosen": -2.6903910636901855, |
|
"logits/rejected": -2.6411349773406982, |
|
"logps/chosen": -1.0175859928131104, |
|
"logps/rejected": -1.0327794551849365, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.40117961168289185, |
|
"rewards/margins": 0.8878555297851562, |
|
"rewards/rejected": -1.2890350818634033, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 866.6073604256766, |
|
"learning_rate": 9.408982457568138e-08, |
|
"logits/chosen": -2.7176403999328613, |
|
"logits/rejected": -2.671095371246338, |
|
"logps/chosen": -0.8981878161430359, |
|
"logps/rejected": -0.9157611131668091, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6816717386245728, |
|
"rewards/margins": 0.9627196192741394, |
|
"rewards/rejected": -1.6443912982940674, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1160.110812326109, |
|
"learning_rate": 9.193737127252132e-08, |
|
"logits/chosen": -2.7276268005371094, |
|
"logits/rejected": -2.6847987174987793, |
|
"logps/chosen": -0.9827289581298828, |
|
"logps/rejected": -0.94146728515625, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4507879614830017, |
|
"rewards/margins": 0.8122564554214478, |
|
"rewards/rejected": -1.2630443572998047, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1118.8296262026665, |
|
"learning_rate": 8.94843839743072e-08, |
|
"logits/chosen": -2.684906244277954, |
|
"logits/rejected": -2.6362223625183105, |
|
"logps/chosen": -0.8857740163803101, |
|
"logps/rejected": -0.9783684611320496, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.4952009320259094, |
|
"rewards/margins": 1.4160592555999756, |
|
"rewards/rejected": -1.9112603664398193, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 816.4913832702339, |
|
"learning_rate": 8.674844141929039e-08, |
|
"logits/chosen": -2.7162153720855713, |
|
"logits/rejected": -2.680973529815674, |
|
"logps/chosen": -0.8731144666671753, |
|
"logps/rejected": -0.8701594471931458, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.34728819131851196, |
|
"rewards/margins": 1.2909767627716064, |
|
"rewards/rejected": -1.6382650136947632, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1469.7710854903712, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": -2.691988945007324, |
|
"logits/rejected": -2.652631998062134, |
|
"logps/chosen": -0.9311792254447937, |
|
"logps/rejected": -0.9303830862045288, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5356858968734741, |
|
"rewards/margins": 1.0560630559921265, |
|
"rewards/rejected": -1.5917489528656006, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1688.4287131310427, |
|
"learning_rate": 8.05080036377971e-08, |
|
"logits/chosen": -2.7248551845550537, |
|
"logits/rejected": -2.6649651527404785, |
|
"logps/chosen": -0.9584500193595886, |
|
"logps/rejected": -0.9874745607376099, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.24187500774860382, |
|
"rewards/margins": 1.0612785816192627, |
|
"rewards/rejected": -1.303153395652771, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 883.2476388726114, |
|
"learning_rate": 7.704822899442949e-08, |
|
"logits/chosen": -2.717927932739258, |
|
"logits/rejected": -2.6812281608581543, |
|
"logps/chosen": -0.9134844541549683, |
|
"logps/rejected": -0.9353663325309753, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3312217593193054, |
|
"rewards/margins": 1.2677682638168335, |
|
"rewards/rejected": -1.5989899635314941, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1741.9681865719251, |
|
"learning_rate": 7.339461978126947e-08, |
|
"logits/chosen": -2.696554660797119, |
|
"logits/rejected": -2.628129482269287, |
|
"logps/chosen": -0.896826446056366, |
|
"logps/rejected": -0.8772991895675659, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48312321305274963, |
|
"rewards/margins": 1.3627839088439941, |
|
"rewards/rejected": -1.845907211303711, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1946.5851641629447, |
|
"learning_rate": 6.957335870218904e-08, |
|
"logits/chosen": -2.673733949661255, |
|
"logits/rejected": -2.6041712760925293, |
|
"logps/chosen": -0.939963698387146, |
|
"logps/rejected": -0.956584095954895, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.2670658528804779, |
|
"rewards/margins": 1.5085264444351196, |
|
"rewards/rejected": -1.2414608001708984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2276.5142714905005, |
|
"learning_rate": 6.56118298974763e-08, |
|
"logits/chosen": -2.732027292251587, |
|
"logits/rejected": -2.7048563957214355, |
|
"logps/chosen": -0.9618963003158569, |
|
"logps/rejected": -1.0246574878692627, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.570631742477417, |
|
"rewards/margins": 1.4420315027236938, |
|
"rewards/rejected": -2.012662887573242, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1178.2661546943384, |
|
"learning_rate": 6.153842270203887e-08, |
|
"logits/chosen": -2.6935534477233887, |
|
"logits/rejected": -2.6572394371032715, |
|
"logps/chosen": -0.8579891920089722, |
|
"logps/rejected": -0.931664764881134, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.0793720930814743, |
|
"rewards/margins": 1.138346552848816, |
|
"rewards/rejected": -1.217718482017517, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 913.5130412259963, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": -2.7250068187713623, |
|
"logits/rejected": -2.6632461547851562, |
|
"logps/chosen": -1.0246375799179077, |
|
"logps/rejected": -0.9633069038391113, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.25627315044403076, |
|
"rewards/margins": 1.4161301851272583, |
|
"rewards/rejected": -1.672403335571289, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1025.54215312441, |
|
"learning_rate": 5.317333003449687e-08, |
|
"logits/chosen": -2.733330011367798, |
|
"logits/rejected": -2.715299606323242, |
|
"logps/chosen": -0.8904998898506165, |
|
"logps/rejected": -0.8720332980155945, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.2184334546327591, |
|
"rewards/margins": 1.4306997060775757, |
|
"rewards/rejected": -1.212266206741333, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 932.0109937677821, |
|
"learning_rate": 4.894159096919109e-08, |
|
"logits/chosen": -2.6878042221069336, |
|
"logits/rejected": -2.65002179145813, |
|
"logps/chosen": -0.9498124122619629, |
|
"logps/rejected": -0.9380944967269897, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5880553722381592, |
|
"rewards/margins": 1.365509271621704, |
|
"rewards/rejected": -1.9535646438598633, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 936.3493959909762, |
|
"learning_rate": 4.471743673537994e-08, |
|
"logits/chosen": -2.7069993019104004, |
|
"logits/rejected": -2.6865086555480957, |
|
"logps/chosen": -0.9886058568954468, |
|
"logps/rejected": -0.9898191690444946, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.04917572811245918, |
|
"rewards/margins": 1.6429868936538696, |
|
"rewards/rejected": -1.5938111543655396, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 984.7410594483609, |
|
"learning_rate": 4.053113870938224e-08, |
|
"logits/chosen": -2.758028507232666, |
|
"logits/rejected": -2.679352283477783, |
|
"logps/chosen": -0.8968666791915894, |
|
"logps/rejected": -0.9234074354171753, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.0006614074227400124, |
|
"rewards/margins": 1.3911110162734985, |
|
"rewards/rejected": -1.3904496431350708, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2539.435048877582, |
|
"learning_rate": 3.641269698018933e-08, |
|
"logits/chosen": -2.713007926940918, |
|
"logits/rejected": -2.6481966972351074, |
|
"logps/chosen": -1.007727861404419, |
|
"logps/rejected": -0.9894694089889526, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.06799235194921494, |
|
"rewards/margins": 1.8265388011932373, |
|
"rewards/rejected": -1.7585465908050537, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1438.0528773750598, |
|
"learning_rate": 3.2391625361107026e-08, |
|
"logits/chosen": -2.7407853603363037, |
|
"logits/rejected": -2.6641056537628174, |
|
"logps/chosen": -0.9492539167404175, |
|
"logps/rejected": -0.9392199516296387, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.02730640210211277, |
|
"rewards/margins": 1.7186416387557983, |
|
"rewards/rejected": -1.745948076248169, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1220.7276779461947, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": -2.697509288787842, |
|
"logits/rejected": -2.640756607055664, |
|
"logps/chosen": -0.9139662981033325, |
|
"logps/rejected": -0.9015041589736938, |
|
"loss": 0.4723, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.023266727104783058, |
|
"rewards/margins": 1.4177985191345215, |
|
"rewards/rejected": -1.3945319652557373, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1542.4514800663226, |
|
"learning_rate": 2.4755952307046063e-08, |
|
"logits/chosen": -2.742187023162842, |
|
"logits/rejected": -2.699744701385498, |
|
"logps/chosen": -1.0006037950515747, |
|
"logps/rejected": -0.9843395352363586, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.14072290062904358, |
|
"rewards/margins": 1.5138235092163086, |
|
"rewards/rejected": -1.3731005191802979, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1452.2302527315665, |
|
"learning_rate": 2.1196070070200995e-08, |
|
"logits/chosen": -2.7377541065216064, |
|
"logits/rejected": -2.6858716011047363, |
|
"logps/chosen": -0.900943398475647, |
|
"logps/rejected": -0.9453694224357605, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.028856370598077774, |
|
"rewards/margins": 1.6425704956054688, |
|
"rewards/rejected": -1.671426773071289, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 966.038328895732, |
|
"learning_rate": 1.7842604207878005e-08, |
|
"logits/chosen": -2.7250287532806396, |
|
"logits/rejected": -2.6620233058929443, |
|
"logps/chosen": -1.0749655961990356, |
|
"logps/rejected": -1.015625238418579, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.16425299644470215, |
|
"rewards/margins": 1.6934306621551514, |
|
"rewards/rejected": -1.8576834201812744, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1686.580221181831, |
|
"learning_rate": 1.4719586519455534e-08, |
|
"logits/chosen": -2.728663444519043, |
|
"logits/rejected": -2.64876389503479, |
|
"logps/chosen": -0.9033769369125366, |
|
"logps/rejected": -0.9375128746032715, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.11283926665782928, |
|
"rewards/margins": 2.1201956272125244, |
|
"rewards/rejected": -2.0073564052581787, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1227.1950699118374, |
|
"learning_rate": 1.18493973533924e-08, |
|
"logits/chosen": -2.679917097091675, |
|
"logits/rejected": -2.611525297164917, |
|
"logps/chosen": -0.9286376237869263, |
|
"logps/rejected": -0.9542851448059082, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18441525101661682, |
|
"rewards/margins": 1.7495098114013672, |
|
"rewards/rejected": -1.9339250326156616, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1294.7256357947958, |
|
"learning_rate": 9.252605223891208e-09, |
|
"logits/chosen": -2.759120464324951, |
|
"logits/rejected": -2.6892759799957275, |
|
"logps/chosen": -0.8945956230163574, |
|
"logps/rejected": -0.9347489476203918, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.42182081937789917, |
|
"rewards/margins": 1.480148196220398, |
|
"rewards/rejected": -1.9019691944122314, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 914.832611398372, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": -2.6895413398742676, |
|
"logits/rejected": -2.635143995285034, |
|
"logps/chosen": -0.9759384393692017, |
|
"logps/rejected": -0.96312016248703, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.2653306722640991, |
|
"rewards/margins": 1.628769874572754, |
|
"rewards/rejected": -1.894100546836853, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1785.1282881305524, |
|
"learning_rate": 4.951556604879048e-09, |
|
"logits/chosen": -2.7073302268981934, |
|
"logits/rejected": -2.661701202392578, |
|
"logps/chosen": -0.9704357385635376, |
|
"logps/rejected": -1.0066497325897217, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.30233365297317505, |
|
"rewards/margins": 1.4092557430267334, |
|
"rewards/rejected": -1.7115894556045532, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 821.6518799466353, |
|
"learning_rate": 3.278122536639888e-09, |
|
"logits/chosen": -2.7481675148010254, |
|
"logits/rejected": -2.692375898361206, |
|
"logps/chosen": -0.8963810205459595, |
|
"logps/rejected": -0.8914927244186401, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.013069706968963146, |
|
"rewards/margins": 1.6356351375579834, |
|
"rewards/rejected": -1.622565507888794, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 820.7523726208289, |
|
"learning_rate": 1.9395094661033118e-09, |
|
"logits/chosen": -2.717128276824951, |
|
"logits/rejected": -2.693026542663574, |
|
"logps/chosen": -0.9069439172744751, |
|
"logps/rejected": -0.9684022068977356, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.17060108482837677, |
|
"rewards/margins": 1.5492388010025024, |
|
"rewards/rejected": -1.7198398113250732, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 791.5724729924565, |
|
"learning_rate": 9.453102390227175e-10, |
|
"logits/chosen": -2.694126605987549, |
|
"logits/rejected": -2.655355453491211, |
|
"logps/chosen": -0.9297587275505066, |
|
"logps/rejected": -0.9920668601989746, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.0569925419986248, |
|
"rewards/margins": 1.6660388708114624, |
|
"rewards/rejected": -1.7230314016342163, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1054.605495809102, |
|
"learning_rate": 3.0264954291494007e-10, |
|
"logits/chosen": -2.7315666675567627, |
|
"logits/rejected": -2.6392362117767334, |
|
"logps/chosen": -0.970044732093811, |
|
"logps/rejected": -0.9285378456115723, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.0074133919551968575, |
|
"rewards/margins": 1.79477858543396, |
|
"rewards/rejected": -1.802191972732544, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1541.6219713183218, |
|
"learning_rate": 1.6132849715988494e-11, |
|
"logits/chosen": -2.7301852703094482, |
|
"logits/rejected": -2.6915435791015625, |
|
"logps/chosen": -0.9321584701538086, |
|
"logps/rejected": -0.9507132768630981, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.2989009618759155, |
|
"rewards/margins": 1.854230523109436, |
|
"rewards/rejected": -2.1531314849853516, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 413, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5277958785073232, |
|
"train_runtime": 6372.8883, |
|
"train_samples_per_second": 8.304, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 413, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|