|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1353154938883383, |
|
"eval_steps": 500, |
|
"global_step": 64, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002114304592005286, |
|
"grad_norm": 1.5018059015274048, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": -0.3466828167438507, |
|
"logits/rejected": -0.30099987983703613, |
|
"logps/chosen": -0.9345186948776245, |
|
"logps/rejected": -0.9117153882980347, |
|
"loss": 1.4889, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.869037389755249, |
|
"rewards/margins": -0.04560665041208267, |
|
"rewards/rejected": -1.8234307765960693, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004228609184010572, |
|
"grad_norm": 0.8093975186347961, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -0.4310421049594879, |
|
"logits/rejected": -0.39132067561149597, |
|
"logps/chosen": -0.8198825716972351, |
|
"logps/rejected": -0.8644211888313293, |
|
"loss": 1.376, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.6397651433944702, |
|
"rewards/margins": 0.08907715976238251, |
|
"rewards/rejected": -1.7288423776626587, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006342913776015857, |
|
"grad_norm": 0.5377389788627625, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.46692028641700745, |
|
"logits/rejected": -0.4649256467819214, |
|
"logps/chosen": -0.9087910652160645, |
|
"logps/rejected": -0.9648240804672241, |
|
"loss": 1.3404, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.817582130432129, |
|
"rewards/margins": 0.11206617206335068, |
|
"rewards/rejected": -1.9296481609344482, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008457218368021144, |
|
"grad_norm": 0.3221875727176666, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -0.416828453540802, |
|
"logits/rejected": -0.3584724962711334, |
|
"logps/chosen": -0.7818898558616638, |
|
"logps/rejected": -0.8170815110206604, |
|
"loss": 1.3806, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.5637797117233276, |
|
"rewards/margins": 0.07038339227437973, |
|
"rewards/rejected": -1.6341630220413208, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010571522960026428, |
|
"grad_norm": 0.64655601978302, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.376886248588562, |
|
"logits/rejected": -0.3516141474246979, |
|
"logps/chosen": -0.8814125061035156, |
|
"logps/rejected": -1.0214396715164185, |
|
"loss": 1.2741, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7628250122070312, |
|
"rewards/margins": 0.28005433082580566, |
|
"rewards/rejected": -2.042879343032837, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012685827552031714, |
|
"grad_norm": 0.4775894582271576, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.4757865369319916, |
|
"logits/rejected": -0.4498941898345947, |
|
"logps/chosen": -0.8962199687957764, |
|
"logps/rejected": -0.9462199807167053, |
|
"loss": 1.364, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.7924399375915527, |
|
"rewards/margins": 0.10000008344650269, |
|
"rewards/rejected": -1.8924399614334106, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014800132144037, |
|
"grad_norm": 1.2459568977355957, |
|
"learning_rate": 1.4583333333333335e-07, |
|
"logits/chosen": -0.38895344734191895, |
|
"logits/rejected": -0.38165366649627686, |
|
"logps/chosen": -0.9025766253471375, |
|
"logps/rejected": -0.9465017318725586, |
|
"loss": 1.3898, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.805153250694275, |
|
"rewards/margins": 0.0878501906991005, |
|
"rewards/rejected": -1.8930034637451172, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016914436736042288, |
|
"grad_norm": 0.6195729374885559, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -0.3964853286743164, |
|
"logits/rejected": -0.377862811088562, |
|
"logps/chosen": -0.9054160118103027, |
|
"logps/rejected": -0.9605879187583923, |
|
"loss": 1.3821, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.8108320236206055, |
|
"rewards/margins": 0.1103438138961792, |
|
"rewards/rejected": -1.9211758375167847, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.019028741328047574, |
|
"grad_norm": 1.2074137926101685, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -0.3729037344455719, |
|
"logits/rejected": -0.38143450021743774, |
|
"logps/chosen": -0.9328653216362, |
|
"logps/rejected": -0.9905799627304077, |
|
"loss": 1.3754, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.8657306432724, |
|
"rewards/margins": 0.11542946100234985, |
|
"rewards/rejected": -1.9811599254608154, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.021143045920052856, |
|
"grad_norm": 0.2867220640182495, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.4263336658477783, |
|
"logits/rejected": -0.42903271317481995, |
|
"logps/chosen": -0.8979260325431824, |
|
"logps/rejected": -0.9078099727630615, |
|
"loss": 1.4438, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.7958520650863647, |
|
"rewards/margins": 0.019767940044403076, |
|
"rewards/rejected": -1.815619945526123, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023257350512058142, |
|
"grad_norm": 0.8363026976585388, |
|
"learning_rate": 2.2916666666666663e-07, |
|
"logits/chosen": -0.3374914526939392, |
|
"logits/rejected": -0.32399696111679077, |
|
"logps/chosen": -0.8886098861694336, |
|
"logps/rejected": -0.9484556317329407, |
|
"loss": 1.3422, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.7772197723388672, |
|
"rewards/margins": 0.11969132721424103, |
|
"rewards/rejected": -1.8969112634658813, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.025371655104063428, |
|
"grad_norm": 0.5406804084777832, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.42844679951667786, |
|
"logits/rejected": -0.37984615564346313, |
|
"logps/chosen": -0.861629843711853, |
|
"logps/rejected": -0.8968492150306702, |
|
"loss": 1.3922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.723259687423706, |
|
"rewards/margins": 0.07043875753879547, |
|
"rewards/rejected": -1.7936984300613403, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.027485959696068714, |
|
"grad_norm": 0.9919329285621643, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -0.36495402455329895, |
|
"logits/rejected": -0.3249490261077881, |
|
"logps/chosen": -0.8502095937728882, |
|
"logps/rejected": -0.8470643758773804, |
|
"loss": 1.4334, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.7004191875457764, |
|
"rewards/margins": -0.006290358491241932, |
|
"rewards/rejected": -1.6941287517547607, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.029600264288074, |
|
"grad_norm": 0.5477162003517151, |
|
"learning_rate": 2.916666666666667e-07, |
|
"logits/chosen": -0.4155704081058502, |
|
"logits/rejected": -0.39535820484161377, |
|
"logps/chosen": -1.0430240631103516, |
|
"logps/rejected": -1.1318373680114746, |
|
"loss": 1.3533, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -2.086048126220703, |
|
"rewards/margins": 0.17762640118598938, |
|
"rewards/rejected": -2.263674736022949, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.031714568880079286, |
|
"grad_norm": 0.26530712842941284, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.4810572564601898, |
|
"logits/rejected": -0.42454615235328674, |
|
"logps/chosen": -0.8741041421890259, |
|
"logps/rejected": -0.9494178295135498, |
|
"loss": 1.3655, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7482082843780518, |
|
"rewards/margins": 0.15062758326530457, |
|
"rewards/rejected": -1.8988356590270996, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.033828873472084575, |
|
"grad_norm": 0.9272629618644714, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -0.4440098702907562, |
|
"logits/rejected": -0.3930297791957855, |
|
"logps/chosen": -0.8473359942436218, |
|
"logps/rejected": -0.9369213581085205, |
|
"loss": 1.3248, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6946719884872437, |
|
"rewards/margins": 0.17917080223560333, |
|
"rewards/rejected": -1.873842716217041, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03594317806408986, |
|
"grad_norm": 0.5912418961524963, |
|
"learning_rate": 3.541666666666667e-07, |
|
"logits/chosen": -0.3838099539279938, |
|
"logits/rejected": -0.3507584035396576, |
|
"logps/chosen": -0.8888350129127502, |
|
"logps/rejected": -0.9361770749092102, |
|
"loss": 1.383, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -1.7776700258255005, |
|
"rewards/margins": 0.0946839451789856, |
|
"rewards/rejected": -1.8723541498184204, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03805748265609515, |
|
"grad_norm": 0.6536504030227661, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.3581697940826416, |
|
"logits/rejected": -0.3620460629463196, |
|
"logps/chosen": -0.8519617319107056, |
|
"logps/rejected": -0.9022184610366821, |
|
"loss": 1.3841, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.7039234638214111, |
|
"rewards/margins": 0.10051343590021133, |
|
"rewards/rejected": -1.8044369220733643, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04017178724810043, |
|
"grad_norm": 0.3433632552623749, |
|
"learning_rate": 3.958333333333333e-07, |
|
"logits/chosen": -0.37887442111968994, |
|
"logits/rejected": -0.37543320655822754, |
|
"logps/chosen": -0.9464104175567627, |
|
"logps/rejected": -1.0017329454421997, |
|
"loss": 1.3649, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.8928208351135254, |
|
"rewards/margins": 0.11064518243074417, |
|
"rewards/rejected": -2.0034658908843994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04228609184010571, |
|
"grad_norm": 0.9764007329940796, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.44110679626464844, |
|
"logits/rejected": -0.4280649721622467, |
|
"logps/chosen": -0.9046768546104431, |
|
"logps/rejected": -1.0464633703231812, |
|
"loss": 1.2592, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8093537092208862, |
|
"rewards/margins": 0.2835729420185089, |
|
"rewards/rejected": -2.0929267406463623, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.044400396432111, |
|
"grad_norm": 1.8563830852508545, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": -0.45183491706848145, |
|
"logits/rejected": -0.42935287952423096, |
|
"logps/chosen": -0.9043138027191162, |
|
"logps/rejected": -0.9462392926216125, |
|
"loss": 1.3784, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8086276054382324, |
|
"rewards/margins": 0.08385094255208969, |
|
"rewards/rejected": -1.892478585243225, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.046514701024116284, |
|
"grad_norm": 1.3473299741744995, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/chosen": -0.37855517864227295, |
|
"logits/rejected": -0.34429043531417847, |
|
"logps/chosen": -0.9284683465957642, |
|
"logps/rejected": -0.9454050064086914, |
|
"loss": 1.4346, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -1.8569366931915283, |
|
"rewards/margins": 0.03387312963604927, |
|
"rewards/rejected": -1.8908100128173828, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04862900561612157, |
|
"grad_norm": 0.940831184387207, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -0.39172160625457764, |
|
"logits/rejected": -0.3695780634880066, |
|
"logps/chosen": -0.9314202666282654, |
|
"logps/rejected": -1.020229697227478, |
|
"loss": 1.3322, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.8628405332565308, |
|
"rewards/margins": 0.17761869728565216, |
|
"rewards/rejected": -2.040459394454956, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.050743310208126856, |
|
"grad_norm": 0.5783158540725708, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.4958629608154297, |
|
"logits/rejected": -0.4257377088069916, |
|
"logps/chosen": -0.9379237294197083, |
|
"logps/rejected": -0.9415461421012878, |
|
"loss": 1.441, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.8758474588394165, |
|
"rewards/margins": 0.0072449808940291405, |
|
"rewards/rejected": -1.8830922842025757, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.052857614800132145, |
|
"grad_norm": 1.4209853410720825, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -0.36407172679901123, |
|
"logits/rejected": -0.3331725299358368, |
|
"logps/chosen": -0.9192589521408081, |
|
"logps/rejected": -0.9595308899879456, |
|
"loss": 1.3994, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.8385179042816162, |
|
"rewards/margins": 0.080544114112854, |
|
"rewards/rejected": -1.9190617799758911, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05497191939213743, |
|
"grad_norm": 0.6310216188430786, |
|
"learning_rate": 5.416666666666666e-07, |
|
"logits/chosen": -0.41772690415382385, |
|
"logits/rejected": -0.36565953493118286, |
|
"logps/chosen": -0.8052878379821777, |
|
"logps/rejected": -0.8673746585845947, |
|
"loss": 1.3356, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.6105756759643555, |
|
"rewards/margins": 0.12417369335889816, |
|
"rewards/rejected": -1.7347493171691895, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05708622398414272, |
|
"grad_norm": 1.2933462858200073, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -0.4482795000076294, |
|
"logits/rejected": -0.39409321546554565, |
|
"logps/chosen": -0.8339261412620544, |
|
"logps/rejected": -0.8675202131271362, |
|
"loss": 1.3739, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -1.6678522825241089, |
|
"rewards/margins": 0.06718815863132477, |
|
"rewards/rejected": -1.7350404262542725, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.059200528576148, |
|
"grad_norm": 0.5808025002479553, |
|
"learning_rate": 5.833333333333334e-07, |
|
"logits/chosen": -0.37116044759750366, |
|
"logits/rejected": -0.3478051722049713, |
|
"logps/chosen": -0.8950318694114685, |
|
"logps/rejected": -0.9756672978401184, |
|
"loss": 1.3505, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.790063738822937, |
|
"rewards/margins": 0.1612708568572998, |
|
"rewards/rejected": -1.9513345956802368, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06131483316815329, |
|
"grad_norm": 1.0569533109664917, |
|
"learning_rate": 6.041666666666666e-07, |
|
"logits/chosen": -0.421148419380188, |
|
"logits/rejected": -0.38443076610565186, |
|
"logps/chosen": -0.8021283745765686, |
|
"logps/rejected": -0.8370179533958435, |
|
"loss": 1.3916, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.6042567491531372, |
|
"rewards/margins": 0.06977920234203339, |
|
"rewards/rejected": -1.674035906791687, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06342913776015857, |
|
"grad_norm": 0.42577147483825684, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -0.4429818391799927, |
|
"logits/rejected": -0.3524704575538635, |
|
"logps/chosen": -0.8916822671890259, |
|
"logps/rejected": -0.8985542058944702, |
|
"loss": 1.4321, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -1.7833645343780518, |
|
"rewards/margins": 0.01374388113617897, |
|
"rewards/rejected": -1.7971084117889404, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06554344235216386, |
|
"grad_norm": 1.0056904554367065, |
|
"learning_rate": 6.458333333333333e-07, |
|
"logits/chosen": -0.376451700925827, |
|
"logits/rejected": -0.342519074678421, |
|
"logps/chosen": -0.9038617014884949, |
|
"logps/rejected": -0.953092634677887, |
|
"loss": 1.398, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.8077234029769897, |
|
"rewards/margins": 0.09846188127994537, |
|
"rewards/rejected": -1.906185269355774, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06765774694416915, |
|
"grad_norm": 0.5494012236595154, |
|
"learning_rate": 6.666666666666666e-07, |
|
"logits/chosen": -0.3459138870239258, |
|
"logits/rejected": -0.3590989410877228, |
|
"logps/chosen": -0.8274999260902405, |
|
"logps/rejected": -0.8776509761810303, |
|
"loss": 1.363, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.654999852180481, |
|
"rewards/margins": 0.1003020703792572, |
|
"rewards/rejected": -1.7553019523620605, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06977205153617443, |
|
"grad_norm": 0.693267822265625, |
|
"learning_rate": 6.875e-07, |
|
"logits/chosen": -0.40053680539131165, |
|
"logits/rejected": -0.37323904037475586, |
|
"logps/chosen": -0.8255244493484497, |
|
"logps/rejected": -0.8658804893493652, |
|
"loss": 1.3712, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.6510488986968994, |
|
"rewards/margins": 0.08071210980415344, |
|
"rewards/rejected": -1.7317609786987305, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07188635612817972, |
|
"grad_norm": 2.213238000869751, |
|
"learning_rate": 7.083333333333334e-07, |
|
"logits/chosen": -0.40097948908805847, |
|
"logits/rejected": -0.38190510869026184, |
|
"logps/chosen": -0.9122671484947205, |
|
"logps/rejected": -0.9549552798271179, |
|
"loss": 1.36, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.824534296989441, |
|
"rewards/margins": 0.0853763073682785, |
|
"rewards/rejected": -1.9099105596542358, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.074000660720185, |
|
"grad_norm": 0.6859830021858215, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": -0.42501094937324524, |
|
"logits/rejected": -0.42549416422843933, |
|
"logps/chosen": -1.0008373260498047, |
|
"logps/rejected": -1.1157118082046509, |
|
"loss": 1.3294, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -2.0016746520996094, |
|
"rewards/margins": 0.22974897921085358, |
|
"rewards/rejected": -2.2314236164093018, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0761149653121903, |
|
"grad_norm": 0.6468721628189087, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": -0.36494994163513184, |
|
"logits/rejected": -0.30433908104896545, |
|
"logps/chosen": -0.9062094688415527, |
|
"logps/rejected": -0.920263409614563, |
|
"loss": 1.4312, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -1.8124189376831055, |
|
"rewards/margins": 0.02810765616595745, |
|
"rewards/rejected": -1.840526819229126, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07822926990419557, |
|
"grad_norm": 0.5085556507110596, |
|
"learning_rate": 7.708333333333333e-07, |
|
"logits/chosen": -0.4677881598472595, |
|
"logits/rejected": -0.456132709980011, |
|
"logps/chosen": -1.0101865530014038, |
|
"logps/rejected": -1.0429682731628418, |
|
"loss": 1.4132, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -2.0203731060028076, |
|
"rewards/margins": 0.06556359678506851, |
|
"rewards/rejected": -2.0859365463256836, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08034357449620086, |
|
"grad_norm": 0.23813335597515106, |
|
"learning_rate": 7.916666666666666e-07, |
|
"logits/chosen": -0.3991190791130066, |
|
"logits/rejected": -0.3664044141769409, |
|
"logps/chosen": -0.9578174352645874, |
|
"logps/rejected": -0.9229263067245483, |
|
"loss": 1.4824, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": -1.9156348705291748, |
|
"rewards/margins": -0.06978224962949753, |
|
"rewards/rejected": -1.8458526134490967, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08245787908820615, |
|
"grad_norm": 0.587037980556488, |
|
"learning_rate": 8.125e-07, |
|
"logits/chosen": -0.37554049491882324, |
|
"logits/rejected": -0.36305734515190125, |
|
"logps/chosen": -0.8503091931343079, |
|
"logps/rejected": -0.864615261554718, |
|
"loss": 1.4086, |
|
"rewards/accuracies": 0.4296875, |
|
"rewards/chosen": -1.7006183862686157, |
|
"rewards/margins": 0.028611989691853523, |
|
"rewards/rejected": -1.729230523109436, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08457218368021142, |
|
"grad_norm": 0.4172501862049103, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.4405443072319031, |
|
"logits/rejected": -0.41723060607910156, |
|
"logps/chosen": -0.8502858877182007, |
|
"logps/rejected": -0.9114271402359009, |
|
"loss": 1.3446, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.7005717754364014, |
|
"rewards/margins": 0.12228240817785263, |
|
"rewards/rejected": -1.8228542804718018, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08668648827221671, |
|
"grad_norm": 0.9275372624397278, |
|
"learning_rate": 8.541666666666666e-07, |
|
"logits/chosen": -0.4200601577758789, |
|
"logits/rejected": -0.3478623628616333, |
|
"logps/chosen": -0.892408013343811, |
|
"logps/rejected": -0.9276402592658997, |
|
"loss": 1.3887, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.784816026687622, |
|
"rewards/margins": 0.07046431303024292, |
|
"rewards/rejected": -1.8552805185317993, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.088800792864222, |
|
"grad_norm": 0.7317383289337158, |
|
"learning_rate": 8.75e-07, |
|
"logits/chosen": -0.37675267457962036, |
|
"logits/rejected": -0.33540332317352295, |
|
"logps/chosen": -0.7866061925888062, |
|
"logps/rejected": -0.824250340461731, |
|
"loss": 1.3837, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.5732123851776123, |
|
"rewards/margins": 0.07528844475746155, |
|
"rewards/rejected": -1.648500680923462, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09091509745622729, |
|
"grad_norm": 0.9452736973762512, |
|
"learning_rate": 8.958333333333334e-07, |
|
"logits/chosen": -0.4662383198738098, |
|
"logits/rejected": -0.4447881579399109, |
|
"logps/chosen": -0.9490666389465332, |
|
"logps/rejected": -1.0112388134002686, |
|
"loss": 1.3412, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.8981332778930664, |
|
"rewards/margins": 0.12434446066617966, |
|
"rewards/rejected": -2.022477626800537, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09302940204823257, |
|
"grad_norm": 0.2848323881626129, |
|
"learning_rate": 9.166666666666665e-07, |
|
"logits/chosen": -0.41404005885124207, |
|
"logits/rejected": -0.3944583535194397, |
|
"logps/chosen": -0.8224930167198181, |
|
"logps/rejected": -0.8416361808776855, |
|
"loss": 1.4027, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.6449860334396362, |
|
"rewards/margins": 0.038286346942186356, |
|
"rewards/rejected": -1.683272361755371, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09514370664023786, |
|
"grad_norm": 0.7165678143501282, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": -0.40475326776504517, |
|
"logits/rejected": -0.3559921383857727, |
|
"logps/chosen": -0.8070214986801147, |
|
"logps/rejected": -0.8993593454360962, |
|
"loss": 1.3148, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.6140429973602295, |
|
"rewards/margins": 0.18467575311660767, |
|
"rewards/rejected": -1.7987186908721924, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09725801123224315, |
|
"grad_norm": 0.4779021739959717, |
|
"learning_rate": 9.583333333333334e-07, |
|
"logits/chosen": -0.4171525835990906, |
|
"logits/rejected": -0.42166149616241455, |
|
"logps/chosen": -0.7872560024261475, |
|
"logps/rejected": -0.8496187925338745, |
|
"loss": 1.3356, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.574512004852295, |
|
"rewards/margins": 0.12472567707300186, |
|
"rewards/rejected": -1.699237585067749, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09937231582424844, |
|
"grad_norm": 0.7870219349861145, |
|
"learning_rate": 9.791666666666667e-07, |
|
"logits/chosen": -0.3734116554260254, |
|
"logits/rejected": -0.32778748869895935, |
|
"logps/chosen": -0.7842286825180054, |
|
"logps/rejected": -0.8161548972129822, |
|
"loss": 1.3647, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.5684573650360107, |
|
"rewards/margins": 0.06385258585214615, |
|
"rewards/rejected": -1.6323097944259644, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10148662041625371, |
|
"grad_norm": 0.2597256600856781, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.4355677664279938, |
|
"logits/rejected": -0.38983187079429626, |
|
"logps/chosen": -0.8787693977355957, |
|
"logps/rejected": -0.9383041262626648, |
|
"loss": 1.35, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.7575387954711914, |
|
"rewards/margins": 0.11906948685646057, |
|
"rewards/rejected": -1.8766082525253296, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.103600925008259, |
|
"grad_norm": 0.9942799210548401, |
|
"learning_rate": 9.999862751990697e-07, |
|
"logits/chosen": -0.4244321882724762, |
|
"logits/rejected": -0.4366786777973175, |
|
"logps/chosen": -0.7910157442092896, |
|
"logps/rejected": -0.8630884885787964, |
|
"loss": 1.3166, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.582031488418579, |
|
"rewards/margins": 0.14414538443088531, |
|
"rewards/rejected": -1.7261769771575928, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10571522960026429, |
|
"grad_norm": 0.5333903431892395, |
|
"learning_rate": 9.999451015497595e-07, |
|
"logits/chosen": -0.389942467212677, |
|
"logits/rejected": -0.36674585938453674, |
|
"logps/chosen": -0.7312074899673462, |
|
"logps/rejected": -0.7289648652076721, |
|
"loss": 1.4225, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.4624149799346924, |
|
"rewards/margins": -0.004485193639993668, |
|
"rewards/rejected": -1.4579297304153442, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10782953419226958, |
|
"grad_norm": 0.5712242722511292, |
|
"learning_rate": 9.9987648131247e-07, |
|
"logits/chosen": -0.4622853994369507, |
|
"logits/rejected": -0.3728552460670471, |
|
"logps/chosen": -0.8764299750328064, |
|
"logps/rejected": -0.869678795337677, |
|
"loss": 1.4542, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.7528599500656128, |
|
"rewards/margins": -0.013502337038516998, |
|
"rewards/rejected": -1.739357590675354, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10994383878427486, |
|
"grad_norm": 0.2586441934108734, |
|
"learning_rate": 9.99780418254397e-07, |
|
"logits/chosen": -0.37249019742012024, |
|
"logits/rejected": -0.3998304605484009, |
|
"logps/chosen": -0.8435611724853516, |
|
"logps/rejected": -0.9359882473945618, |
|
"loss": 1.3057, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6871223449707031, |
|
"rewards/margins": 0.18485431373119354, |
|
"rewards/rejected": -1.8719764947891235, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11205814337628014, |
|
"grad_norm": 1.0829113721847534, |
|
"learning_rate": 9.996569176493268e-07, |
|
"logits/chosen": -0.47697725892066956, |
|
"logits/rejected": -0.4208195209503174, |
|
"logps/chosen": -0.8014968037605286, |
|
"logps/rejected": -0.8703804612159729, |
|
"loss": 1.3523, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.6029936075210571, |
|
"rewards/margins": 0.1377674788236618, |
|
"rewards/rejected": -1.7407609224319458, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11417244796828543, |
|
"grad_norm": 0.5523208379745483, |
|
"learning_rate": 9.995059862773438e-07, |
|
"logits/chosen": -0.40533363819122314, |
|
"logits/rejected": -0.36801978945732117, |
|
"logps/chosen": -0.7641825675964355, |
|
"logps/rejected": -0.8168596029281616, |
|
"loss": 1.3692, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.528365135192871, |
|
"rewards/margins": 0.10535416752099991, |
|
"rewards/rejected": -1.6337192058563232, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11628675256029072, |
|
"grad_norm": 0.614101767539978, |
|
"learning_rate": 9.993276324244605e-07, |
|
"logits/chosen": -0.4476906955242157, |
|
"logits/rejected": -0.40396648645401, |
|
"logps/chosen": -0.8706808090209961, |
|
"logps/rejected": -0.9221430420875549, |
|
"loss": 1.3787, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.7413616180419922, |
|
"rewards/margins": 0.10292442888021469, |
|
"rewards/rejected": -1.8442860841751099, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.118401057152296, |
|
"grad_norm": 0.3428778052330017, |
|
"learning_rate": 9.991218658821608e-07, |
|
"logits/chosen": -0.31709593534469604, |
|
"logits/rejected": -0.2760937213897705, |
|
"logps/chosen": -0.842248797416687, |
|
"logps/rejected": -0.8068034648895264, |
|
"loss": 1.498, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.684497594833374, |
|
"rewards/margins": -0.07089066505432129, |
|
"rewards/rejected": -1.6136069297790527, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12051536174430129, |
|
"grad_norm": 0.6877723932266235, |
|
"learning_rate": 9.988886979468643e-07, |
|
"logits/chosen": -0.41800016164779663, |
|
"logits/rejected": -0.4011584222316742, |
|
"logps/chosen": -0.7845420837402344, |
|
"logps/rejected": -0.834447979927063, |
|
"loss": 1.3491, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.5690841674804688, |
|
"rewards/margins": 0.09981165081262589, |
|
"rewards/rejected": -1.668895959854126, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12262966633630658, |
|
"grad_norm": 0.9649701714515686, |
|
"learning_rate": 9.98628141419305e-07, |
|
"logits/chosen": -0.4253537058830261, |
|
"logits/rejected": -0.4305458962917328, |
|
"logps/chosen": -0.86476731300354, |
|
"logps/rejected": -0.9080386161804199, |
|
"loss": 1.3639, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.72953462600708, |
|
"rewards/margins": 0.08654248714447021, |
|
"rewards/rejected": -1.8160772323608398, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12474397092831185, |
|
"grad_norm": 1.3779780864715576, |
|
"learning_rate": 9.98340210603829e-07, |
|
"logits/chosen": -0.39970022439956665, |
|
"logits/rejected": -0.441428005695343, |
|
"logps/chosen": -0.8662775158882141, |
|
"logps/rejected": -0.9646260738372803, |
|
"loss": 1.3001, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -1.7325550317764282, |
|
"rewards/margins": 0.19669723510742188, |
|
"rewards/rejected": -1.9292521476745605, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12685827552031714, |
|
"grad_norm": 0.5366966724395752, |
|
"learning_rate": 9.980249213076084e-07, |
|
"logits/chosen": -0.37770116329193115, |
|
"logits/rejected": -0.35231757164001465, |
|
"logps/chosen": -0.8165755867958069, |
|
"logps/rejected": -0.8619179129600525, |
|
"loss": 1.3699, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.6331511735916138, |
|
"rewards/margins": 0.09068439900875092, |
|
"rewards/rejected": -1.723835825920105, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12897258011232243, |
|
"grad_norm": 0.36810922622680664, |
|
"learning_rate": 9.976822908397748e-07, |
|
"logits/chosen": -0.4224976897239685, |
|
"logits/rejected": -0.41758257150650024, |
|
"logps/chosen": -0.8445641994476318, |
|
"logps/rejected": -0.9393664598464966, |
|
"loss": 1.3193, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.6891283988952637, |
|
"rewards/margins": 0.18960458040237427, |
|
"rewards/rejected": -1.8787329196929932, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13108688470432772, |
|
"grad_norm": 0.6838279366493225, |
|
"learning_rate": 9.97312338010468e-07, |
|
"logits/chosen": -0.4168627858161926, |
|
"logits/rejected": -0.36115381121635437, |
|
"logps/chosen": -0.8370552659034729, |
|
"logps/rejected": -0.8352169394493103, |
|
"loss": 1.4284, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.6741105318069458, |
|
"rewards/margins": -0.0036766715347766876, |
|
"rewards/rejected": -1.6704338788986206, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.133201189296333, |
|
"grad_norm": 0.39330533146858215, |
|
"learning_rate": 9.969150831298037e-07, |
|
"logits/chosen": -0.4558233618736267, |
|
"logits/rejected": -0.4025765061378479, |
|
"logps/chosen": -0.826255738735199, |
|
"logps/rejected": -0.894213080406189, |
|
"loss": 1.3485, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.652511477470398, |
|
"rewards/margins": 0.13591471314430237, |
|
"rewards/rejected": -1.788426160812378, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1353154938883383, |
|
"grad_norm": 0.6055929660797119, |
|
"learning_rate": 9.964905480067584e-07, |
|
"logits/chosen": -0.459463506937027, |
|
"logits/rejected": -0.42943331599235535, |
|
"logps/chosen": -0.7901928424835205, |
|
"logps/rejected": -0.7964221239089966, |
|
"loss": 1.4057, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.580385684967041, |
|
"rewards/margins": 0.012458762153983116, |
|
"rewards/rejected": -1.5928442478179932, |
|
"step": 64 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 472, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 64, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|