|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998424948810837, |
|
"eval_steps": 100, |
|
"global_step": 3174, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.5723270440251572e-09, |
|
"logits/chosen": -1.3876760005950928, |
|
"logits/rejected": -1.4584133625030518, |
|
"logps/chosen": -148.11717224121094, |
|
"logps/rejected": -197.28189086914062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.5723270440251573e-08, |
|
"logits/chosen": -1.2969517707824707, |
|
"logits/rejected": -1.0069656372070312, |
|
"logps/chosen": -190.4855499267578, |
|
"logps/rejected": -182.0135498046875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.0011108842445537448, |
|
"rewards/margins": 0.001312906388193369, |
|
"rewards/margins_max": 0.0032973522320389748, |
|
"rewards/margins_min": -0.0006715393392369151, |
|
"rewards/margins_std": 0.0028064302168786526, |
|
"rewards/rejected": -0.00020202209998387843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 3.1446540880503146e-08, |
|
"logits/chosen": -1.36593496799469, |
|
"logits/rejected": -1.0528085231781006, |
|
"logps/chosen": -225.4935760498047, |
|
"logps/rejected": -200.0979766845703, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 7.484816160285845e-05, |
|
"rewards/margins": -0.0001347160286968574, |
|
"rewards/margins_max": 0.0016663169953972101, |
|
"rewards/margins_min": -0.0019357489654794335, |
|
"rewards/margins_std": 0.0025470454711467028, |
|
"rewards/rejected": 0.00020956425578333437, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits/chosen": -1.26302170753479, |
|
"logits/rejected": -0.982827365398407, |
|
"logps/chosen": -180.48269653320312, |
|
"logps/rejected": -184.57960510253906, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0005473994533531368, |
|
"rewards/margins": 0.0005724715883843601, |
|
"rewards/margins_max": 0.002282569883391261, |
|
"rewards/margins_min": -0.0011376264737918973, |
|
"rewards/margins_std": 0.002418444026261568, |
|
"rewards/rejected": -0.0011198710417374969, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.289308176100629e-08, |
|
"logits/chosen": -1.4589287042617798, |
|
"logits/rejected": -1.1574287414550781, |
|
"logps/chosen": -225.4607696533203, |
|
"logps/rejected": -276.73675537109375, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00039744950481690466, |
|
"rewards/margins": -0.00030673606670461595, |
|
"rewards/margins_max": 0.0013146628625690937, |
|
"rewards/margins_min": -0.0019281348213553429, |
|
"rewards/margins_std": 0.0022930041886866093, |
|
"rewards/rejected": 0.0007041855715215206, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 7.861635220125786e-08, |
|
"logits/chosen": -1.3671009540557861, |
|
"logits/rejected": -0.8631851077079773, |
|
"logps/chosen": -331.6417236328125, |
|
"logps/rejected": -205.7646026611328, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0002600564039312303, |
|
"rewards/margins": -0.0007569913868792355, |
|
"rewards/margins_max": 0.0010670910123735666, |
|
"rewards/margins_min": -0.002581073669716716, |
|
"rewards/margins_std": 0.002579641994088888, |
|
"rewards/rejected": 0.0010170477908104658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits/chosen": -1.1948202848434448, |
|
"logits/rejected": -1.0117332935333252, |
|
"logps/chosen": -203.6728515625, |
|
"logps/rejected": -264.63153076171875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.000772724102716893, |
|
"rewards/margins": 1.4207902495400049e-05, |
|
"rewards/margins_max": 0.0016571771120652556, |
|
"rewards/margins_min": -0.001628761412575841, |
|
"rewards/margins_std": 0.0023235089611262083, |
|
"rewards/rejected": 0.0007585162529721856, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.10062893081761e-07, |
|
"logits/chosen": -1.4154024124145508, |
|
"logits/rejected": -1.0937511920928955, |
|
"logps/chosen": -218.91259765625, |
|
"logps/rejected": -224.9219207763672, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00017823689267970622, |
|
"rewards/margins": -0.00020709517411887646, |
|
"rewards/margins_max": 0.0016052055871114135, |
|
"rewards/margins_min": -0.002019395586103201, |
|
"rewards/margins_std": 0.00256298016756773, |
|
"rewards/rejected": 0.00038533215411007404, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.2578616352201258e-07, |
|
"logits/chosen": -1.2727240324020386, |
|
"logits/rejected": -0.9936261177062988, |
|
"logps/chosen": -285.10943603515625, |
|
"logps/rejected": -266.4510192871094, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0009560451726429164, |
|
"rewards/margins": 0.0006419096025638282, |
|
"rewards/margins_max": 0.0029980712570250034, |
|
"rewards/margins_min": -0.0017142522847279906, |
|
"rewards/margins_std": 0.003332116873934865, |
|
"rewards/rejected": 0.0003141355118714273, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.4150943396226414e-07, |
|
"logits/chosen": -1.4589568376541138, |
|
"logits/rejected": -1.1692708730697632, |
|
"logps/chosen": -212.2246551513672, |
|
"logps/rejected": -219.21646118164062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0005518359830603004, |
|
"rewards/margins": 0.001025562291033566, |
|
"rewards/margins_max": 0.0023937453515827656, |
|
"rewards/margins_min": -0.0003426209441386163, |
|
"rewards/margins_std": 0.001934903091751039, |
|
"rewards/rejected": -0.00047372624976560473, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.5723270440251572e-07, |
|
"logits/chosen": -1.405853033065796, |
|
"logits/rejected": -0.9023151397705078, |
|
"logps/chosen": -257.5167236328125, |
|
"logps/rejected": -205.4027862548828, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0005634050467051566, |
|
"rewards/margins": 0.001445975387468934, |
|
"rewards/margins_max": 0.0039651584811508656, |
|
"rewards/margins_min": -0.001073207939043641, |
|
"rewards/margins_std": 0.0035626632161438465, |
|
"rewards/rejected": -0.0008825702825561166, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.7295597484276728e-07, |
|
"logits/chosen": -1.2503092288970947, |
|
"logits/rejected": -0.9771049618721008, |
|
"logps/chosen": -230.6888427734375, |
|
"logps/rejected": -189.9393310546875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.000937645963858813, |
|
"rewards/margins": 0.001897513517178595, |
|
"rewards/margins_max": 0.0035337067674845457, |
|
"rewards/margins_min": 0.00026132012135349214, |
|
"rewards/margins_std": 0.0023139265831559896, |
|
"rewards/rejected": -0.0009598674369044602, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.8867924528301886e-07, |
|
"logits/chosen": -1.413317084312439, |
|
"logits/rejected": -1.0483345985412598, |
|
"logps/chosen": -195.40811157226562, |
|
"logps/rejected": -186.1103515625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0013014640426263213, |
|
"rewards/margins": 0.0010003356728702784, |
|
"rewards/margins_max": 0.0033742673695087433, |
|
"rewards/margins_min": -0.0013735961401835084, |
|
"rewards/margins_std": 0.003357246518135071, |
|
"rewards/rejected": 0.0003011283988598734, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 2.0440251572327044e-07, |
|
"logits/chosen": -1.1766637563705444, |
|
"logits/rejected": -0.9444602131843567, |
|
"logps/chosen": -219.5814666748047, |
|
"logps/rejected": -248.6021728515625, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0013613433111459017, |
|
"rewards/margins": 0.0008185977349057794, |
|
"rewards/margins_max": 0.0035369223915040493, |
|
"rewards/margins_min": -0.0018997270381078124, |
|
"rewards/margins_std": 0.0038442914374172688, |
|
"rewards/rejected": 0.0005427456344477832, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 2.20125786163522e-07, |
|
"logits/chosen": -1.1843626499176025, |
|
"logits/rejected": -0.9615445137023926, |
|
"logps/chosen": -267.6846923828125, |
|
"logps/rejected": -216.41455078125, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0015651207650080323, |
|
"rewards/margins": 0.0013728371122851968, |
|
"rewards/margins_max": 0.003893634770065546, |
|
"rewards/margins_min": -0.0011479605454951525, |
|
"rewards/margins_std": 0.0035649463534355164, |
|
"rewards/rejected": 0.00019228360906708986, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.3584905660377358e-07, |
|
"logits/chosen": -1.3410217761993408, |
|
"logits/rejected": -0.8768698573112488, |
|
"logps/chosen": -305.0233154296875, |
|
"logps/rejected": -234.83407592773438, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0015008506597951055, |
|
"rewards/margins": 0.002513662213459611, |
|
"rewards/margins_max": 0.005906062666326761, |
|
"rewards/margins_min": -0.0008787383558228612, |
|
"rewards/margins_std": 0.004797579254955053, |
|
"rewards/rejected": -0.0010128116700798273, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 2.5157232704402517e-07, |
|
"logits/chosen": -1.3618042469024658, |
|
"logits/rejected": -1.1711941957473755, |
|
"logps/chosen": -168.73251342773438, |
|
"logps/rejected": -229.12173461914062, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0013464975636452436, |
|
"rewards/margins": 0.0017303951317444444, |
|
"rewards/margins_max": 0.002993419300764799, |
|
"rewards/margins_min": 0.000467371050035581, |
|
"rewards/margins_std": 0.0017861860105767846, |
|
"rewards/rejected": -0.0003838978009298444, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 2.672955974842767e-07, |
|
"logits/chosen": -1.2365471124649048, |
|
"logits/rejected": -0.9317380785942078, |
|
"logps/chosen": -220.8829345703125, |
|
"logps/rejected": -198.69509887695312, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0018165509682148695, |
|
"rewards/margins": 0.0023886661510914564, |
|
"rewards/margins_max": 0.004315841477364302, |
|
"rewards/margins_min": 0.0004614906501956284, |
|
"rewards/margins_std": 0.0027254377491772175, |
|
"rewards/rejected": -0.0005721148918382823, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 2.830188679245283e-07, |
|
"logits/chosen": -1.3770387172698975, |
|
"logits/rejected": -1.0459026098251343, |
|
"logps/chosen": -213.62649536132812, |
|
"logps/rejected": -216.0526580810547, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0032491025049239397, |
|
"rewards/margins": 0.004016959108412266, |
|
"rewards/margins_max": 0.006775864399969578, |
|
"rewards/margins_min": 0.0012580546317622066, |
|
"rewards/margins_std": 0.0039016795344650745, |
|
"rewards/rejected": -0.0007678564870730042, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 2.9874213836477983e-07, |
|
"logits/chosen": -1.280879259109497, |
|
"logits/rejected": -1.0743911266326904, |
|
"logps/chosen": -196.5890655517578, |
|
"logps/rejected": -215.959228515625, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.003082216251641512, |
|
"rewards/margins": 0.0024519709404557943, |
|
"rewards/margins_max": 0.004635250195860863, |
|
"rewards/margins_min": 0.0002686919760890305, |
|
"rewards/margins_std": 0.003087623044848442, |
|
"rewards/rejected": 0.0006302451947703958, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 3.1446540880503144e-07, |
|
"logits/chosen": -1.312201738357544, |
|
"logits/rejected": -1.1042929887771606, |
|
"logps/chosen": -219.0549774169922, |
|
"logps/rejected": -207.017822265625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.001382762915454805, |
|
"rewards/margins": 0.002051582094281912, |
|
"rewards/margins_max": 0.0041840835474431515, |
|
"rewards/margins_min": -8.09192206361331e-05, |
|
"rewards/margins_std": 0.0030158127192407846, |
|
"rewards/rejected": -0.0006688194698654115, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 3.30188679245283e-07, |
|
"logits/chosen": -1.4287524223327637, |
|
"logits/rejected": -1.090522050857544, |
|
"logps/chosen": -237.6526336669922, |
|
"logps/rejected": -253.42056274414062, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.003422607434913516, |
|
"rewards/margins": 0.0037254388444125652, |
|
"rewards/margins_max": 0.007606147322803736, |
|
"rewards/margins_min": -0.00015526966308243573, |
|
"rewards/margins_std": 0.005488150753080845, |
|
"rewards/rejected": -0.00030283164232969284, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 3.4591194968553456e-07, |
|
"logits/chosen": -1.3905646800994873, |
|
"logits/rejected": -1.1243839263916016, |
|
"logps/chosen": -275.39739990234375, |
|
"logps/rejected": -198.20419311523438, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0038289937656372786, |
|
"rewards/margins": 0.0037508513778448105, |
|
"rewards/margins_max": 0.00589752709493041, |
|
"rewards/margins_min": 0.0016041755443438888, |
|
"rewards/margins_std": 0.0030358582735061646, |
|
"rewards/rejected": 7.814211130607873e-05, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 3.616352201257861e-07, |
|
"logits/chosen": -1.4099429845809937, |
|
"logits/rejected": -1.1149197816848755, |
|
"logps/chosen": -253.6432647705078, |
|
"logps/rejected": -201.7845458984375, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.004002997186034918, |
|
"rewards/margins": 0.004339634440839291, |
|
"rewards/margins_max": 0.0070360577665269375, |
|
"rewards/margins_min": 0.0016432113479822874, |
|
"rewards/margins_std": 0.003813318442553282, |
|
"rewards/rejected": -0.00033663742942735553, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 3.773584905660377e-07, |
|
"logits/chosen": -1.3923314809799194, |
|
"logits/rejected": -1.2636398077011108, |
|
"logps/chosen": -176.70986938476562, |
|
"logps/rejected": -260.1700134277344, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.003964459989219904, |
|
"rewards/margins": 0.0052338032983243465, |
|
"rewards/margins_max": 0.008278938010334969, |
|
"rewards/margins_min": 0.0021886671893298626, |
|
"rewards/margins_std": 0.004306471906602383, |
|
"rewards/rejected": -0.0012693424941971898, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 3.9308176100628933e-07, |
|
"logits/chosen": -1.4243371486663818, |
|
"logits/rejected": -1.1771718263626099, |
|
"logps/chosen": -265.36944580078125, |
|
"logps/rejected": -217.3080291748047, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.004339687060564756, |
|
"rewards/margins": 0.005566070321947336, |
|
"rewards/margins_max": 0.009654941037297249, |
|
"rewards/margins_min": 0.00147719937376678, |
|
"rewards/margins_std": 0.00578253623098135, |
|
"rewards/rejected": -0.0012263832613825798, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 4.088050314465409e-07, |
|
"logits/chosen": -1.2650946378707886, |
|
"logits/rejected": -0.7829256057739258, |
|
"logps/chosen": -283.19415283203125, |
|
"logps/rejected": -258.4779052734375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.006826590746641159, |
|
"rewards/margins": 0.0073587894439697266, |
|
"rewards/margins_max": 0.012493086978793144, |
|
"rewards/margins_min": 0.002224491210654378, |
|
"rewards/margins_std": 0.007260994054377079, |
|
"rewards/rejected": -0.0005321979406289756, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 4.2452830188679244e-07, |
|
"logits/chosen": -1.379631757736206, |
|
"logits/rejected": -0.836907684803009, |
|
"logps/chosen": -260.239501953125, |
|
"logps/rejected": -241.7003631591797, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.006983810570091009, |
|
"rewards/margins": 0.009598185308277607, |
|
"rewards/margins_max": 0.013700554147362709, |
|
"rewards/margins_min": 0.00549581553786993, |
|
"rewards/margins_std": 0.005801626015454531, |
|
"rewards/rejected": -0.002614373806864023, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 4.40251572327044e-07, |
|
"logits/chosen": -1.538783311843872, |
|
"logits/rejected": -1.2011783123016357, |
|
"logps/chosen": -195.48477172851562, |
|
"logps/rejected": -190.34756469726562, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.004987453110516071, |
|
"rewards/margins": 0.005685538984835148, |
|
"rewards/margins_max": 0.008754138834774494, |
|
"rewards/margins_min": 0.002616937505081296, |
|
"rewards/margins_std": 0.004339656792581081, |
|
"rewards/rejected": -0.0006980849429965019, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 4.559748427672956e-07, |
|
"logits/chosen": -1.453611135482788, |
|
"logits/rejected": -1.022805094718933, |
|
"logps/chosen": -242.57275390625, |
|
"logps/rejected": -207.9861602783203, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.007862430065870285, |
|
"rewards/margins": 0.010466397739946842, |
|
"rewards/margins_max": 0.015998583287000656, |
|
"rewards/margins_min": 0.00493421358987689, |
|
"rewards/margins_std": 0.007823689840734005, |
|
"rewards/rejected": -0.0026039674412459135, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.7169811320754717e-07, |
|
"logits/chosen": -1.6172186136245728, |
|
"logits/rejected": -1.1852935552597046, |
|
"logps/chosen": -227.5122528076172, |
|
"logps/rejected": -211.18136596679688, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01041549351066351, |
|
"rewards/margins": 0.009889104403555393, |
|
"rewards/margins_max": 0.014782111160457134, |
|
"rewards/margins_min": 0.004996097646653652, |
|
"rewards/margins_std": 0.006919757463037968, |
|
"rewards/rejected": 0.0005263882922008634, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 4.874213836477988e-07, |
|
"logits/chosen": -1.3813108205795288, |
|
"logits/rejected": -1.1312620639801025, |
|
"logps/chosen": -199.9052276611328, |
|
"logps/rejected": -216.5789337158203, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.009382456541061401, |
|
"rewards/margins": 0.008607019670307636, |
|
"rewards/margins_max": 0.013246886432170868, |
|
"rewards/margins_min": 0.003967151511460543, |
|
"rewards/margins_std": 0.0065617635846138, |
|
"rewards/rejected": 0.0007754383259452879, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.999993950030735e-07, |
|
"logits/chosen": -1.3889760971069336, |
|
"logits/rejected": -1.0410958528518677, |
|
"logps/chosen": -250.23452758789062, |
|
"logps/rejected": -237.1486053466797, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.007147905416786671, |
|
"rewards/margins": 0.008209905587136745, |
|
"rewards/margins_max": 0.013320088386535645, |
|
"rewards/margins_min": 0.003099723719060421, |
|
"rewards/margins_std": 0.007226888090372086, |
|
"rewards/rejected": -0.0010620001703500748, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 4.999782204181026e-07, |
|
"logits/chosen": -1.4792320728302002, |
|
"logits/rejected": -0.9951168298721313, |
|
"logps/chosen": -240.000732421875, |
|
"logps/rejected": -256.5224304199219, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.011202135123312473, |
|
"rewards/margins": 0.014701342210173607, |
|
"rewards/margins_max": 0.022865889593958855, |
|
"rewards/margins_min": 0.006536795757710934, |
|
"rewards/margins_std": 0.011546412482857704, |
|
"rewards/rejected": -0.003499208018183708, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 4.999267989149139e-07, |
|
"logits/chosen": -1.3123283386230469, |
|
"logits/rejected": -0.9737062454223633, |
|
"logps/chosen": -181.56187438964844, |
|
"logps/rejected": -181.15927124023438, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00947630312293768, |
|
"rewards/margins": 0.010779361240565777, |
|
"rewards/margins_max": 0.014272956177592278, |
|
"rewards/margins_min": 0.007285767234861851, |
|
"rewards/margins_std": 0.004940689541399479, |
|
"rewards/rejected": -0.0013030586997047067, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 4.998451367154173e-07, |
|
"logits/chosen": -1.3401153087615967, |
|
"logits/rejected": -0.9146574139595032, |
|
"logps/chosen": -263.080810546875, |
|
"logps/rejected": -242.10769653320312, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.010717710480093956, |
|
"rewards/margins": 0.013646000996232033, |
|
"rewards/margins_max": 0.021306831389665604, |
|
"rewards/margins_min": 0.005985168274492025, |
|
"rewards/margins_std": 0.010834051296114922, |
|
"rewards/rejected": -0.0029282893519848585, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 4.997332437005931e-07, |
|
"logits/chosen": -1.6230709552764893, |
|
"logits/rejected": -1.197361946105957, |
|
"logps/chosen": -276.94305419921875, |
|
"logps/rejected": -222.4744110107422, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.01071252953261137, |
|
"rewards/margins": 0.014663497917354107, |
|
"rewards/margins_max": 0.023539308458566666, |
|
"rewards/margins_min": 0.005787692964076996, |
|
"rewards/margins_std": 0.012552286498248577, |
|
"rewards/rejected": -0.003950969781726599, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 4.995911334092962e-07, |
|
"logits/chosen": -1.4035460948944092, |
|
"logits/rejected": -1.0208442211151123, |
|
"logps/chosen": -231.21987915039062, |
|
"logps/rejected": -171.5296630859375, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.012600275687873363, |
|
"rewards/margins": 0.01731901802122593, |
|
"rewards/margins_max": 0.023710820823907852, |
|
"rewards/margins_min": 0.010927212424576283, |
|
"rewards/margins_std": 0.0090393777936697, |
|
"rewards/rejected": -0.0047187404707074165, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.994188230366183e-07, |
|
"logits/chosen": -1.3101140260696411, |
|
"logits/rejected": -1.0723780393600464, |
|
"logps/chosen": -228.2586669921875, |
|
"logps/rejected": -181.29495239257812, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01025369018316269, |
|
"rewards/margins": 0.014498481526970863, |
|
"rewards/margins_max": 0.023106779903173447, |
|
"rewards/margins_min": 0.005890182219445705, |
|
"rewards/margins_std": 0.012173972092568874, |
|
"rewards/rejected": -0.004244790878146887, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.992163334318065e-07, |
|
"logits/chosen": -1.295276165008545, |
|
"logits/rejected": -0.8806821703910828, |
|
"logps/chosen": -266.9888916015625, |
|
"logps/rejected": -213.50936889648438, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.012072061188519001, |
|
"rewards/margins": 0.016327695921063423, |
|
"rewards/margins_max": 0.02478429302573204, |
|
"rewards/margins_min": 0.007871100679039955, |
|
"rewards/margins_std": 0.011959430761635303, |
|
"rewards/rejected": -0.004255634266883135, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 4.989836890957414e-07, |
|
"logits/chosen": -1.3160616159439087, |
|
"logits/rejected": -0.985907256603241, |
|
"logps/chosen": -209.94921875, |
|
"logps/rejected": -199.602294921875, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.011544553562998772, |
|
"rewards/margins": 0.013081875629723072, |
|
"rewards/margins_max": 0.020108871161937714, |
|
"rewards/margins_min": 0.00605488196015358, |
|
"rewards/margins_std": 0.009937671013176441, |
|
"rewards/rejected": -0.0015373228816315532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 4.987209181779722e-07, |
|
"logits/chosen": -1.4713616371154785, |
|
"logits/rejected": -1.2096041440963745, |
|
"logps/chosen": -192.724853515625, |
|
"logps/rejected": -176.57815551757812, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.012110630050301552, |
|
"rewards/margins": 0.013713735155761242, |
|
"rewards/margins_max": 0.019766617566347122, |
|
"rewards/margins_min": 0.007660853676497936, |
|
"rewards/margins_std": 0.008560067042708397, |
|
"rewards/rejected": -0.0016031056875362992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 4.984280524733107e-07, |
|
"logits/chosen": -1.367755651473999, |
|
"logits/rejected": -0.9895979762077332, |
|
"logps/chosen": -256.28277587890625, |
|
"logps/rejected": -244.4967041015625, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02168741635978222, |
|
"rewards/margins": 0.0215130727738142, |
|
"rewards/margins_max": 0.03101455047726631, |
|
"rewards/margins_min": 0.012011596001684666, |
|
"rewards/margins_std": 0.013437116518616676, |
|
"rewards/rejected": 0.00017434502660762519, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 4.98105127417984e-07, |
|
"logits/chosen": -1.3296594619750977, |
|
"logits/rejected": -1.030011773109436, |
|
"logps/chosen": -259.4856872558594, |
|
"logps/rejected": -251.21728515625, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.019032226875424385, |
|
"rewards/margins": 0.019077284261584282, |
|
"rewards/margins_max": 0.032611675560474396, |
|
"rewards/margins_min": 0.005542895756661892, |
|
"rewards/margins_std": 0.019140515476465225, |
|
"rewards/rejected": -4.505945253185928e-05, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 4.97752182085347e-07, |
|
"logits/chosen": -1.513671636581421, |
|
"logits/rejected": -0.9878429174423218, |
|
"logps/chosen": -207.31887817382812, |
|
"logps/rejected": -203.1181640625, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.02094658836722374, |
|
"rewards/margins": 0.020975876599550247, |
|
"rewards/margins_max": 0.0333896279335022, |
|
"rewards/margins_min": 0.008562122471630573, |
|
"rewards/margins_std": 0.01755569875240326, |
|
"rewards/rejected": -2.9285531127243303e-05, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 4.973692591811548e-07, |
|
"logits/chosen": -1.3032740354537964, |
|
"logits/rejected": -1.1484416723251343, |
|
"logps/chosen": -212.3832550048828, |
|
"logps/rejected": -230.93881225585938, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01571028307080269, |
|
"rewards/margins": 0.018571963533759117, |
|
"rewards/margins_max": 0.028482910245656967, |
|
"rewards/margins_min": 0.008661014959216118, |
|
"rewards/margins_std": 0.014016198925673962, |
|
"rewards/rejected": -0.002861680928617716, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 4.96956405038395e-07, |
|
"logits/chosen": -1.2598702907562256, |
|
"logits/rejected": -0.9527764320373535, |
|
"logps/chosen": -172.1599578857422, |
|
"logps/rejected": -211.24148559570312, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.015954652801156044, |
|
"rewards/margins": 0.020714178681373596, |
|
"rewards/margins_max": 0.03062686324119568, |
|
"rewards/margins_min": 0.010801494121551514, |
|
"rewards/margins_std": 0.014018652029335499, |
|
"rewards/rejected": -0.004759527277201414, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 4.965136696116812e-07, |
|
"logits/chosen": -1.3497663736343384, |
|
"logits/rejected": -1.029840111732483, |
|
"logps/chosen": -213.888916015625, |
|
"logps/rejected": -260.24090576171875, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.020394446328282356, |
|
"rewards/margins": 0.026438722386956215, |
|
"rewards/margins_max": 0.037151582539081573, |
|
"rewards/margins_min": 0.015725860372185707, |
|
"rewards/margins_std": 0.015150276012718678, |
|
"rewards/rejected": -0.006044276989996433, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 4.960411064712094e-07, |
|
"logits/chosen": -1.3540217876434326, |
|
"logits/rejected": -1.0137008428573608, |
|
"logps/chosen": -183.4146270751953, |
|
"logps/rejected": -218.0957489013672, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0185568667948246, |
|
"rewards/margins": 0.021527493372559547, |
|
"rewards/margins_max": 0.031103383749723434, |
|
"rewards/margins_min": 0.011951602064073086, |
|
"rewards/margins_std": 0.013542355969548225, |
|
"rewards/rejected": -0.002970626810565591, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.955387727962759e-07, |
|
"logits/chosen": -1.469268798828125, |
|
"logits/rejected": -1.1933975219726562, |
|
"logps/chosen": -175.39476013183594, |
|
"logps/rejected": -170.35171508789062, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.016227375715970993, |
|
"rewards/margins": 0.018915237858891487, |
|
"rewards/margins_max": 0.028273263946175575, |
|
"rewards/margins_min": 0.009557214565575123, |
|
"rewards/margins_std": 0.013234244659543037, |
|
"rewards/rejected": -0.0026878633070737123, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.95006729368358e-07, |
|
"logits/chosen": -1.591016411781311, |
|
"logits/rejected": -1.1849809885025024, |
|
"logps/chosen": -215.30050659179688, |
|
"logps/rejected": -204.9720458984375, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.017296748235821724, |
|
"rewards/margins": 0.025146162137389183, |
|
"rewards/margins_max": 0.03591996058821678, |
|
"rewards/margins_min": 0.014372363686561584, |
|
"rewards/margins_std": 0.015236446633934975, |
|
"rewards/rejected": -0.007849409244954586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 4.944450405637601e-07, |
|
"logits/chosen": -1.3407318592071533, |
|
"logits/rejected": -1.0564701557159424, |
|
"logps/chosen": -208.2605438232422, |
|
"logps/rejected": -194.71420288085938, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.014876808039844036, |
|
"rewards/margins": 0.02353464625775814, |
|
"rewards/margins_max": 0.03144057095050812, |
|
"rewards/margins_min": 0.015628723427653313, |
|
"rewards/margins_std": 0.011180664412677288, |
|
"rewards/rejected": -0.008657841011881828, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 4.938537743458248e-07, |
|
"logits/chosen": -1.3480737209320068, |
|
"logits/rejected": -1.028096318244934, |
|
"logps/chosen": -179.07174682617188, |
|
"logps/rejected": -181.2510223388672, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.019465144723653793, |
|
"rewards/margins": 0.02149110659956932, |
|
"rewards/margins_max": 0.032975539565086365, |
|
"rewards/margins_min": 0.010006672702729702, |
|
"rewards/margins_std": 0.016241444274783134, |
|
"rewards/rejected": -0.0020259625744074583, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.932330022567081e-07, |
|
"logits/chosen": -1.3110549449920654, |
|
"logits/rejected": -1.0896965265274048, |
|
"logps/chosen": -212.4080810546875, |
|
"logps/rejected": -204.01026916503906, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.018044626340270042, |
|
"rewards/margins": 0.027044925838708878, |
|
"rewards/margins_max": 0.03969361633062363, |
|
"rewards/margins_min": 0.01439622975885868, |
|
"rewards/margins_std": 0.017887955531477928, |
|
"rewards/rejected": -0.009000294841825962, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 4.925827994087244e-07, |
|
"logits/chosen": -1.467350721359253, |
|
"logits/rejected": -0.9765011072158813, |
|
"logps/chosen": -196.60804748535156, |
|
"logps/rejected": -212.77114868164062, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.022185953333973885, |
|
"rewards/margins": 0.024724114686250687, |
|
"rewards/margins_max": 0.03783790022134781, |
|
"rewards/margins_min": 0.011610329151153564, |
|
"rewards/margins_std": 0.018545694649219513, |
|
"rewards/rejected": -0.00253815995529294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 4.91903244475257e-07, |
|
"logits/chosen": -1.4453445672988892, |
|
"logits/rejected": -1.2255735397338867, |
|
"logps/chosen": -233.5497589111328, |
|
"logps/rejected": -203.3679962158203, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.022691726684570312, |
|
"rewards/margins": 0.027249213308095932, |
|
"rewards/margins_max": 0.042755015194416046, |
|
"rewards/margins_min": 0.011743416078388691, |
|
"rewards/margins_std": 0.021928513422608376, |
|
"rewards/rejected": -0.004557489417493343, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.91194419681239e-07, |
|
"logits/chosen": -1.4020469188690186, |
|
"logits/rejected": -1.0889606475830078, |
|
"logps/chosen": -201.20901489257812, |
|
"logps/rejected": -204.1788330078125, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.024152381345629692, |
|
"rewards/margins": 0.02786511741578579, |
|
"rewards/margins_max": 0.042585860937833786, |
|
"rewards/margins_min": 0.013144371099770069, |
|
"rewards/margins_std": 0.020818280056118965, |
|
"rewards/rejected": -0.00371273560449481, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 4.904564107932048e-07, |
|
"logits/chosen": -1.2641432285308838, |
|
"logits/rejected": -0.897659182548523, |
|
"logps/chosen": -271.8118896484375, |
|
"logps/rejected": -239.61300659179688, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.017158251255750656, |
|
"rewards/margins": 0.024462290108203888, |
|
"rewards/margins_max": 0.03610853850841522, |
|
"rewards/margins_min": 0.012816043570637703, |
|
"rewards/margins_std": 0.016470283269882202, |
|
"rewards/rejected": -0.007304038852453232, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.896893071089115e-07, |
|
"logits/chosen": -1.3425147533416748, |
|
"logits/rejected": -1.0659515857696533, |
|
"logps/chosen": -230.05111694335938, |
|
"logps/rejected": -245.98550415039062, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02490960620343685, |
|
"rewards/margins": 0.0353575199842453, |
|
"rewards/margins_max": 0.05103808641433716, |
|
"rewards/margins_min": 0.019676949828863144, |
|
"rewards/margins_std": 0.022175675258040428, |
|
"rewards/rejected": -0.010447912849485874, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.888932014465352e-07, |
|
"logits/chosen": -1.313063383102417, |
|
"logits/rejected": -0.9944307208061218, |
|
"logps/chosen": -208.10879516601562, |
|
"logps/rejected": -223.5287322998047, |
|
"loss": 0.679, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022358160465955734, |
|
"rewards/margins": 0.03080761432647705, |
|
"rewards/margins_max": 0.04008474573493004, |
|
"rewards/margins_min": 0.021530481055378914, |
|
"rewards/margins_std": 0.013119848445057869, |
|
"rewards/rejected": -0.008449452929198742, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.5, |
|
"learning_rate": 4.88068190133439e-07, |
|
"logits/chosen": -1.4195083379745483, |
|
"logits/rejected": -1.1984379291534424, |
|
"logps/chosen": -293.54132080078125, |
|
"logps/rejected": -259.38360595703125, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023516178131103516, |
|
"rewards/margins": 0.03416413068771362, |
|
"rewards/margins_max": 0.045261941850185394, |
|
"rewards/margins_min": 0.023066317662596703, |
|
"rewards/margins_std": 0.01569467782974243, |
|
"rewards/rejected": -0.010647954419255257, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.872143729945184e-07, |
|
"logits/chosen": -1.2229716777801514, |
|
"logits/rejected": -0.8150213360786438, |
|
"logps/chosen": -218.43276977539062, |
|
"logps/rejected": -191.75827026367188, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022476380690932274, |
|
"rewards/margins": 0.03276657313108444, |
|
"rewards/margins_max": 0.046399351209402084, |
|
"rewards/margins_min": 0.019133802503347397, |
|
"rewards/margins_std": 0.019279656931757927, |
|
"rewards/rejected": -0.010290195234119892, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 4.863318533401223e-07, |
|
"logits/chosen": -1.3115172386169434, |
|
"logits/rejected": -0.8752225041389465, |
|
"logps/chosen": -246.44140625, |
|
"logps/rejected": -270.7298278808594, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02260987088084221, |
|
"rewards/margins": 0.034483883529901505, |
|
"rewards/margins_max": 0.053231727331876755, |
|
"rewards/margins_min": 0.015736039727926254, |
|
"rewards/margins_std": 0.02651345357298851, |
|
"rewards/rejected": -0.01187401358038187, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.854207379535528e-07, |
|
"logits/chosen": -1.4319788217544556, |
|
"logits/rejected": -1.0323983430862427, |
|
"logps/chosen": -254.2920684814453, |
|
"logps/rejected": -237.34378051757812, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.017374712973833084, |
|
"rewards/margins": 0.029332011938095093, |
|
"rewards/margins_max": 0.04456937313079834, |
|
"rewards/margins_min": 0.014094656333327293, |
|
"rewards/margins_std": 0.021548878401517868, |
|
"rewards/rejected": -0.011957301758229733, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 4.844811370781446e-07, |
|
"logits/chosen": -1.4312872886657715, |
|
"logits/rejected": -0.9997726678848267, |
|
"logps/chosen": -244.96224975585938, |
|
"logps/rejected": -225.2017059326172, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.030679643154144287, |
|
"rewards/margins": 0.03587724640965462, |
|
"rewards/margins_max": 0.05194888263940811, |
|
"rewards/margins_min": 0.01980561390519142, |
|
"rewards/margins_std": 0.022728722542524338, |
|
"rewards/rejected": -0.00519760325551033, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 4.835131644039251e-07, |
|
"logits/chosen": -1.4758861064910889, |
|
"logits/rejected": -0.9546338319778442, |
|
"logps/chosen": -339.2093505859375, |
|
"logps/rejected": -224.3199920654297, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02631019614636898, |
|
"rewards/margins": 0.04230981320142746, |
|
"rewards/margins_max": 0.05546834319829941, |
|
"rewards/margins_min": 0.02915129065513611, |
|
"rewards/margins_std": 0.01860896497964859, |
|
"rewards/rejected": -0.015999620780348778, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 4.825169370538594e-07, |
|
"logits/chosen": -1.2813438177108765, |
|
"logits/rejected": -1.069059133529663, |
|
"logps/chosen": -231.5976104736328, |
|
"logps/rejected": -245.3614501953125, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02429072931408882, |
|
"rewards/margins": 0.027610447257757187, |
|
"rewards/margins_max": 0.0455966591835022, |
|
"rewards/margins_min": 0.009624237194657326, |
|
"rewards/margins_std": 0.025436347350478172, |
|
"rewards/rejected": -0.003319723065942526, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.814925755696778e-07, |
|
"logits/chosen": -1.4551244974136353, |
|
"logits/rejected": -0.9832841157913208, |
|
"logps/chosen": -288.0592346191406, |
|
"logps/rejected": -252.08364868164062, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028888309374451637, |
|
"rewards/margins": 0.03665446117520332, |
|
"rewards/margins_max": 0.05256615951657295, |
|
"rewards/margins_min": 0.020742762833833694, |
|
"rewards/margins_std": 0.022502535954117775, |
|
"rewards/rejected": -0.007766152266412973, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.804402038972899e-07, |
|
"logits/chosen": -1.4220234155654907, |
|
"logits/rejected": -1.02151358127594, |
|
"logps/chosen": -271.8201599121094, |
|
"logps/rejected": -273.0591735839844, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.028314124792814255, |
|
"rewards/margins": 0.03461919724941254, |
|
"rewards/margins_max": 0.0456906296312809, |
|
"rewards/margins_min": 0.02354777231812477, |
|
"rewards/margins_std": 0.015657365322113037, |
|
"rewards/rejected": -0.006305074784904718, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 4.79359949371789e-07, |
|
"logits/chosen": -1.3343526124954224, |
|
"logits/rejected": -0.9362949132919312, |
|
"logps/chosen": -257.0128173828125, |
|
"logps/rejected": -225.06753540039062, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.025834929198026657, |
|
"rewards/margins": 0.035724394023418427, |
|
"rewards/margins_max": 0.04703225940465927, |
|
"rewards/margins_min": 0.024416524916887283, |
|
"rewards/margins_std": 0.015991736203432083, |
|
"rewards/rejected": -0.009889459237456322, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 4.782519427020432e-07, |
|
"logits/chosen": -1.3143739700317383, |
|
"logits/rejected": -0.9841324687004089, |
|
"logps/chosen": -204.9932098388672, |
|
"logps/rejected": -218.54141235351562, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.016944795846939087, |
|
"rewards/margins": 0.0319121815264225, |
|
"rewards/margins_max": 0.04973548650741577, |
|
"rewards/margins_min": 0.014088879339396954, |
|
"rewards/margins_std": 0.025205958634614944, |
|
"rewards/rejected": -0.014967384748160839, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 4.771163179548808e-07, |
|
"logits/chosen": -1.3899494409561157, |
|
"logits/rejected": -0.9665300250053406, |
|
"logps/chosen": -333.5497131347656, |
|
"logps/rejected": -231.9584503173828, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.027130257338285446, |
|
"rewards/margins": 0.04676414281129837, |
|
"rewards/margins_max": 0.06533849239349365, |
|
"rewards/margins_min": 0.028189798817038536, |
|
"rewards/margins_std": 0.026268085464835167, |
|
"rewards/rejected": -0.019633881747722626, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 4.75953212538868e-07, |
|
"logits/chosen": -1.2207629680633545, |
|
"logits/rejected": -0.8575620651245117, |
|
"logps/chosen": -263.4650573730469, |
|
"logps/rejected": -233.98886108398438, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03418269008398056, |
|
"rewards/margins": 0.05005268007516861, |
|
"rewards/margins_max": 0.0767994374036789, |
|
"rewards/margins_min": 0.02330590970814228, |
|
"rewards/margins_std": 0.03782564401626587, |
|
"rewards/rejected": -0.0158699844032526, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 4.7476276718768284e-07, |
|
"logits/chosen": -1.434762716293335, |
|
"logits/rejected": -1.0808919668197632, |
|
"logps/chosen": -206.06533813476562, |
|
"logps/rejected": -218.6961669921875, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.026135995984077454, |
|
"rewards/margins": 0.03937443345785141, |
|
"rewards/margins_max": 0.05718846991658211, |
|
"rewards/margins_min": 0.021560396999120712, |
|
"rewards/margins_std": 0.025192851200699806, |
|
"rewards/rejected": -0.01323844026774168, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 4.7354512594308654e-07, |
|
"logits/chosen": -1.3616220951080322, |
|
"logits/rejected": -1.1791460514068604, |
|
"logps/chosen": -193.25332641601562, |
|
"logps/rejected": -197.8474578857422, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.024115614593029022, |
|
"rewards/margins": 0.03462132811546326, |
|
"rewards/margins_max": 0.052414439618587494, |
|
"rewards/margins_min": 0.016828209161758423, |
|
"rewards/margins_std": 0.02516326680779457, |
|
"rewards/rejected": -0.010505708865821362, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.7230043613749527e-07, |
|
"logits/chosen": -1.3196706771850586, |
|
"logits/rejected": -1.0803533792495728, |
|
"logps/chosen": -229.3977813720703, |
|
"logps/rejected": -198.8769989013672, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.023436803370714188, |
|
"rewards/margins": 0.03306025639176369, |
|
"rewards/margins_max": 0.05252969264984131, |
|
"rewards/margins_min": 0.013590824790298939, |
|
"rewards/margins_std": 0.027533939108252525, |
|
"rewards/rejected": -0.009623454883694649, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 4.710288483761524e-07, |
|
"logits/chosen": -1.1608425378799438, |
|
"logits/rejected": -0.8409261703491211, |
|
"logps/chosen": -238.56130981445312, |
|
"logps/rejected": -212.3688507080078, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01990620419383049, |
|
"rewards/margins": 0.038102246820926666, |
|
"rewards/margins_max": 0.053315240889787674, |
|
"rewards/margins_min": 0.022889258340001106, |
|
"rewards/margins_std": 0.021514419466257095, |
|
"rewards/rejected": -0.018196044489741325, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 4.697305165189062e-07, |
|
"logits/chosen": -1.4269897937774658, |
|
"logits/rejected": -1.0499980449676514, |
|
"logps/chosen": -230.6156768798828, |
|
"logps/rejected": -229.01708984375, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.028978174552321434, |
|
"rewards/margins": 0.03824831172823906, |
|
"rewards/margins_max": 0.06103180721402168, |
|
"rewards/margins_min": 0.015464827418327332, |
|
"rewards/margins_std": 0.03222071751952171, |
|
"rewards/rejected": -0.009270140901207924, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.6840559766159235e-07, |
|
"logits/chosen": -1.3930243253707886, |
|
"logits/rejected": -0.9540492296218872, |
|
"logps/chosen": -225.96084594726562, |
|
"logps/rejected": -237.0554962158203, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03488198295235634, |
|
"rewards/margins": 0.05557037144899368, |
|
"rewards/margins_max": 0.07404030859470367, |
|
"rewards/margins_min": 0.0371004194021225, |
|
"rewards/margins_std": 0.026120448485016823, |
|
"rewards/rejected": -0.02068837732076645, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.6705425211702656e-07, |
|
"logits/chosen": -1.4000756740570068, |
|
"logits/rejected": -1.1083465814590454, |
|
"logps/chosen": -172.87281799316406, |
|
"logps/rejected": -189.81704711914062, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.023823823779821396, |
|
"rewards/margins": 0.0378737710416317, |
|
"rewards/margins_max": 0.05512396618723869, |
|
"rewards/margins_min": 0.020623570308089256, |
|
"rewards/margins_std": 0.02439546398818493, |
|
"rewards/rejected": -0.014049944467842579, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.656766433956062e-07, |
|
"logits/chosen": -1.3979090452194214, |
|
"logits/rejected": -0.8946587443351746, |
|
"logps/chosen": -248.97512817382812, |
|
"logps/rejected": -218.88919067382812, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03325175121426582, |
|
"rewards/margins": 0.05383073538541794, |
|
"rewards/margins_max": 0.07829690724611282, |
|
"rewards/margins_min": 0.02936457097530365, |
|
"rewards/margins_std": 0.034600384533405304, |
|
"rewards/rejected": -0.020578987896442413, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.6427293818552613e-07, |
|
"logits/chosen": -1.4188311100006104, |
|
"logits/rejected": -0.9876410365104675, |
|
"logps/chosen": -234.7069854736328, |
|
"logps/rejected": -182.8696746826172, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03687068074941635, |
|
"rewards/margins": 0.04073936119675636, |
|
"rewards/margins_max": 0.05916820093989372, |
|
"rewards/margins_min": 0.022310517728328705, |
|
"rewards/margins_std": 0.026062315329909325, |
|
"rewards/rejected": -0.0038686811458319426, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 4.6284330633260994e-07, |
|
"logits/chosen": -1.3178324699401855, |
|
"logits/rejected": -0.9743862152099609, |
|
"logps/chosen": -202.13705444335938, |
|
"logps/rejected": -199.08094787597656, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0206548273563385, |
|
"rewards/margins": 0.038690946996212006, |
|
"rewards/margins_max": 0.057139646261930466, |
|
"rewards/margins_min": 0.020242247730493546, |
|
"rewards/margins_std": 0.026090402156114578, |
|
"rewards/rejected": -0.018036121502518654, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.6138792081975844e-07, |
|
"logits/chosen": -1.4049233198165894, |
|
"logits/rejected": -1.0411832332611084, |
|
"logps/chosen": -223.6266632080078, |
|
"logps/rejected": -188.9544219970703, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03218904137611389, |
|
"rewards/margins": 0.04215382784605026, |
|
"rewards/margins_max": 0.06759864091873169, |
|
"rewards/margins_min": 0.016709014773368835, |
|
"rewards/margins_std": 0.03598439693450928, |
|
"rewards/rejected": -0.009964784607291222, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 4.599069577460194e-07, |
|
"logits/chosen": -1.4191118478775024, |
|
"logits/rejected": -1.1629408597946167, |
|
"logps/chosen": -280.8072814941406, |
|
"logps/rejected": -243.64645385742188, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04105687886476517, |
|
"rewards/margins": 0.05216727405786514, |
|
"rewards/margins_max": 0.07590137422084808, |
|
"rewards/margins_min": 0.028433170169591904, |
|
"rewards/margins_std": 0.033565085381269455, |
|
"rewards/rejected": -0.011110392399132252, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.5840059630527985e-07, |
|
"logits/chosen": -1.505789875984192, |
|
"logits/rejected": -1.090831995010376, |
|
"logps/chosen": -203.38735961914062, |
|
"logps/rejected": -218.90292358398438, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.035862646996974945, |
|
"rewards/margins": 0.048062682151794434, |
|
"rewards/margins_max": 0.06934330612421036, |
|
"rewards/margins_min": 0.02678206004202366, |
|
"rewards/margins_std": 0.030095338821411133, |
|
"rewards/rejected": -0.01220003329217434, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 4.5686901876458384e-07, |
|
"logits/chosen": -1.4151548147201538, |
|
"logits/rejected": -1.0735548734664917, |
|
"logps/chosen": -211.01199340820312, |
|
"logps/rejected": -224.63619995117188, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02967039868235588, |
|
"rewards/margins": 0.04329541698098183, |
|
"rewards/margins_max": 0.0652666911482811, |
|
"rewards/margins_min": 0.021324139088392258, |
|
"rewards/margins_std": 0.03107207641005516, |
|
"rewards/rejected": -0.013625016435980797, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.553124104420784e-07, |
|
"logits/chosen": -1.3255832195281982, |
|
"logits/rejected": -1.1080420017242432, |
|
"logps/chosen": -205.3770751953125, |
|
"logps/rejected": -209.04660034179688, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022777115926146507, |
|
"rewards/margins": 0.0444360189139843, |
|
"rewards/margins_max": 0.06135256215929985, |
|
"rewards/margins_min": 0.02751948870718479, |
|
"rewards/margins_std": 0.023923594504594803, |
|
"rewards/rejected": -0.02165890485048294, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 4.537309596845905e-07, |
|
"logits/chosen": -1.4212206602096558, |
|
"logits/rejected": -1.1468111276626587, |
|
"logps/chosen": -203.2875213623047, |
|
"logps/rejected": -183.04867553710938, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03265067934989929, |
|
"rewards/margins": 0.04764062911272049, |
|
"rewards/margins_max": 0.06543318927288055, |
|
"rewards/margins_min": 0.029848068952560425, |
|
"rewards/margins_std": 0.025162484496831894, |
|
"rewards/rejected": -0.014989949762821198, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 4.521248578448373e-07, |
|
"logits/chosen": -1.295290231704712, |
|
"logits/rejected": -1.2244700193405151, |
|
"logps/chosen": -167.2049560546875, |
|
"logps/rejected": -235.2522735595703, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.021955791860818863, |
|
"rewards/margins": 0.0307827889919281, |
|
"rewards/margins_max": 0.04658069089055061, |
|
"rewards/margins_min": 0.01498488150537014, |
|
"rewards/margins_std": 0.022341612726449966, |
|
"rewards/rejected": -0.008826995268464088, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 4.504942992582732e-07, |
|
"logits/chosen": -1.2876170873641968, |
|
"logits/rejected": -1.070996642112732, |
|
"logps/chosen": -201.41519165039062, |
|
"logps/rejected": -215.9574737548828, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.033751118928194046, |
|
"rewards/margins": 0.03825461119413376, |
|
"rewards/margins_max": 0.05966836214065552, |
|
"rewards/margins_min": 0.016840863972902298, |
|
"rewards/margins_std": 0.030283614993095398, |
|
"rewards/rejected": -0.004503494594246149, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 4.4883948121957483e-07, |
|
"logits/chosen": -1.3818947076797485, |
|
"logits/rejected": -1.1178925037384033, |
|
"logps/chosen": -170.48712158203125, |
|
"logps/rejected": -221.67098999023438, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028661269694566727, |
|
"rewards/margins": 0.04194828122854233, |
|
"rewards/margins_max": 0.06600390374660492, |
|
"rewards/margins_min": 0.017892662435770035, |
|
"rewards/margins_std": 0.03401978313922882, |
|
"rewards/rejected": -0.013287017121911049, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.471606039587695e-07, |
|
"logits/chosen": -1.4353498220443726, |
|
"logits/rejected": -1.2498797178268433, |
|
"logps/chosen": -250.303466796875, |
|
"logps/rejected": -252.1241912841797, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.030318697914481163, |
|
"rewards/margins": 0.03714519739151001, |
|
"rewards/margins_max": 0.05723171681165695, |
|
"rewards/margins_min": 0.01705867424607277, |
|
"rewards/margins_std": 0.02840663120150566, |
|
"rewards/rejected": -0.006826499011367559, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 4.4545787061700746e-07, |
|
"logits/chosen": -1.4596531391143799, |
|
"logits/rejected": -0.9841306805610657, |
|
"logps/chosen": -191.699462890625, |
|
"logps/rejected": -231.6093292236328, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.037762559950351715, |
|
"rewards/margins": 0.05371398851275444, |
|
"rewards/margins_max": 0.07777608931064606, |
|
"rewards/margins_min": 0.029651891440153122, |
|
"rewards/margins_std": 0.03402894735336304, |
|
"rewards/rejected": -0.015951428562402725, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 4.4373148722198183e-07, |
|
"logits/chosen": -1.3031915426254272, |
|
"logits/rejected": -0.9831310510635376, |
|
"logps/chosen": -203.33865356445312, |
|
"logps/rejected": -233.1627197265625, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03300677612423897, |
|
"rewards/margins": 0.06004180386662483, |
|
"rewards/margins_max": 0.09207084774971008, |
|
"rewards/margins_min": 0.028012752532958984, |
|
"rewards/margins_std": 0.04529590904712677, |
|
"rewards/rejected": -0.027035022154450417, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 4.4198166266300025e-07, |
|
"logits/chosen": -1.4863415956497192, |
|
"logits/rejected": -1.0757322311401367, |
|
"logps/chosen": -224.2287139892578, |
|
"logps/rejected": -239.1637725830078, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.034104812890291214, |
|
"rewards/margins": 0.05569925159215927, |
|
"rewards/margins_max": 0.08411959558725357, |
|
"rewards/margins_min": 0.027278924360871315, |
|
"rewards/margins_std": 0.04019241780042648, |
|
"rewards/rejected": -0.021594444289803505, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.402086086657092e-07, |
|
"logits/chosen": -1.5037004947662354, |
|
"logits/rejected": -0.9914538264274597, |
|
"logps/chosen": -196.54397583007812, |
|
"logps/rejected": -211.8581085205078, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.030650585889816284, |
|
"rewards/margins": 0.04529775679111481, |
|
"rewards/margins_max": 0.06939564645290375, |
|
"rewards/margins_min": 0.021199876442551613, |
|
"rewards/margins_std": 0.034079547971487045, |
|
"rewards/rejected": -0.014647173695266247, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 4.3841253976647584e-07, |
|
"logits/chosen": -1.4153146743774414, |
|
"logits/rejected": -1.0589603185653687, |
|
"logps/chosen": -195.8428192138672, |
|
"logps/rejected": -189.7724609375, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03924962133169174, |
|
"rewards/margins": 0.05352933332324028, |
|
"rewards/margins_max": 0.08327177166938782, |
|
"rewards/margins_min": 0.023786883801221848, |
|
"rewards/margins_std": 0.042062170803546906, |
|
"rewards/rejected": -0.014279710128903389, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 4.3659367328642917e-07, |
|
"logits/chosen": -1.1924479007720947, |
|
"logits/rejected": -1.0621583461761475, |
|
"logps/chosen": -212.1740264892578, |
|
"logps/rejected": -255.7356414794922, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.026019830256700516, |
|
"rewards/margins": 0.039071694016456604, |
|
"rewards/margins_max": 0.05669945478439331, |
|
"rewards/margins_min": 0.021443922072649002, |
|
"rewards/margins_std": 0.024929430335760117, |
|
"rewards/rejected": -0.013051861897110939, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.5, |
|
"learning_rate": 4.3475222930516473e-07, |
|
"logits/chosen": -1.3828264474868774, |
|
"logits/rejected": -1.12723708152771, |
|
"logps/chosen": -209.1006622314453, |
|
"logps/rejected": -211.49014282226562, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.024994900450110435, |
|
"rewards/margins": 0.04330515116453171, |
|
"rewards/margins_max": 0.0647624060511589, |
|
"rewards/margins_min": 0.021847892552614212, |
|
"rewards/margins_std": 0.03034515120089054, |
|
"rewards/rejected": -0.018310246989130974, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.3288843063411573e-07, |
|
"logits/chosen": -1.588935136795044, |
|
"logits/rejected": -1.1631680727005005, |
|
"logps/chosen": -212.937255859375, |
|
"logps/rejected": -199.36758422851562, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03406776860356331, |
|
"rewards/margins": 0.04911542311310768, |
|
"rewards/margins_max": 0.07006208598613739, |
|
"rewards/margins_min": 0.02816876210272312, |
|
"rewards/margins_std": 0.029623055830597878, |
|
"rewards/rejected": -0.015047654509544373, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.310025027895925e-07, |
|
"logits/chosen": -1.4283082485198975, |
|
"logits/rejected": -1.1020632982254028, |
|
"logps/chosen": -215.6881103515625, |
|
"logps/rejected": -228.725341796875, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.035303063690662384, |
|
"rewards/margins": 0.047801949083805084, |
|
"rewards/margins_max": 0.06932314485311508, |
|
"rewards/margins_min": 0.026280760765075684, |
|
"rewards/margins_std": 0.03043556772172451, |
|
"rewards/rejected": -0.01249888725578785, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 4.290946739654962e-07, |
|
"logits/chosen": -1.3023067712783813, |
|
"logits/rejected": -0.9218745231628418, |
|
"logps/chosen": -245.7296905517578, |
|
"logps/rejected": -226.1380615234375, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.031115401536226273, |
|
"rewards/margins": 0.05050064995884895, |
|
"rewards/margins_max": 0.06927161663770676, |
|
"rewards/margins_min": 0.03172967582941055, |
|
"rewards/margins_std": 0.026546159759163857, |
|
"rewards/rejected": -0.019385244697332382, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 4.2716517500570704e-07, |
|
"logits/chosen": -1.3911397457122803, |
|
"logits/rejected": -1.181490182876587, |
|
"logps/chosen": -186.88909912109375, |
|
"logps/rejected": -218.47900390625, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.024538397789001465, |
|
"rewards/margins": 0.040439218282699585, |
|
"rewards/margins_max": 0.060990117490291595, |
|
"rewards/margins_min": 0.019888322800397873, |
|
"rewards/margins_std": 0.029063355177640915, |
|
"rewards/rejected": -0.01590082235634327, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 4.252142393761533e-07, |
|
"logits/chosen": -1.4555580615997314, |
|
"logits/rejected": -1.0047805309295654, |
|
"logps/chosen": -251.005615234375, |
|
"logps/rejected": -284.2795715332031, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03476887568831444, |
|
"rewards/margins": 0.054674047976732254, |
|
"rewards/margins_max": 0.07890333235263824, |
|
"rewards/margins_min": 0.030444765463471413, |
|
"rewards/margins_std": 0.03426538407802582, |
|
"rewards/rejected": -0.019905168563127518, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 4.232421031365617e-07, |
|
"logits/chosen": -1.3305257558822632, |
|
"logits/rejected": -1.1427993774414062, |
|
"logps/chosen": -180.7315216064453, |
|
"logps/rejected": -214.3630828857422, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02955777570605278, |
|
"rewards/margins": 0.051715098321437836, |
|
"rewards/margins_max": 0.07381218671798706, |
|
"rewards/margins_min": 0.029618006199598312, |
|
"rewards/margins_std": 0.031250011175870895, |
|
"rewards/rejected": -0.022157320752739906, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 4.212490049118951e-07, |
|
"logits/chosen": -1.4470938444137573, |
|
"logits/rejected": -1.143046498298645, |
|
"logps/chosen": -198.9834442138672, |
|
"logps/rejected": -239.5135955810547, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.028033524751663208, |
|
"rewards/margins": 0.04688093811273575, |
|
"rewards/margins_max": 0.06461174786090851, |
|
"rewards/margins_min": 0.02915012836456299, |
|
"rewards/margins_std": 0.025075148791074753, |
|
"rewards/rejected": -0.018847409635782242, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.1923518586347914e-07, |
|
"logits/chosen": -1.4638912677764893, |
|
"logits/rejected": -1.0022966861724854, |
|
"logps/chosen": -209.8042449951172, |
|
"logps/rejected": -192.01329040527344, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03496958687901497, |
|
"rewards/margins": 0.05312635377049446, |
|
"rewards/margins_max": 0.07696934044361115, |
|
"rewards/margins_min": 0.029283368960022926, |
|
"rewards/margins_std": 0.03371907025575638, |
|
"rewards/rejected": -0.018156763166189194, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 4.172008896598221e-07, |
|
"logits/chosen": -1.3048521280288696, |
|
"logits/rejected": -1.0749359130859375, |
|
"logps/chosen": -201.3562469482422, |
|
"logps/rejected": -187.42080688476562, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03374785929918289, |
|
"rewards/margins": 0.055463533848524094, |
|
"rewards/margins_max": 0.07117541134357452, |
|
"rewards/margins_min": 0.03975165635347366, |
|
"rewards/margins_std": 0.022219957783818245, |
|
"rewards/rejected": -0.02171567641198635, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 4.151463624471313e-07, |
|
"logits/chosen": -1.32763671875, |
|
"logits/rejected": -0.8290489315986633, |
|
"logps/chosen": -311.2829895019531, |
|
"logps/rejected": -223.5568084716797, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04091322422027588, |
|
"rewards/margins": 0.07021255791187286, |
|
"rewards/margins_max": 0.10776461660861969, |
|
"rewards/margins_min": 0.03266051039099693, |
|
"rewards/margins_std": 0.05310662463307381, |
|
"rewards/rejected": -0.029299337416887283, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.130718528195303e-07, |
|
"logits/chosen": -1.4879382848739624, |
|
"logits/rejected": -1.0252676010131836, |
|
"logps/chosen": -229.93917846679688, |
|
"logps/rejected": -225.00491333007812, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.041905976831912994, |
|
"rewards/margins": 0.06251799315214157, |
|
"rewards/margins_max": 0.08764694631099701, |
|
"rewards/margins_min": 0.03738904744386673, |
|
"rewards/margins_std": 0.035537708550691605, |
|
"rewards/rejected": -0.020612016320228577, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 4.109776117889789e-07, |
|
"logits/chosen": -1.371626853942871, |
|
"logits/rejected": -0.9644553065299988, |
|
"logps/chosen": -256.90826416015625, |
|
"logps/rejected": -262.3277282714844, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.04058977589011192, |
|
"rewards/margins": 0.058307357132434845, |
|
"rewards/margins_max": 0.07639677822589874, |
|
"rewards/margins_min": 0.04021793603897095, |
|
"rewards/margins_std": 0.025582294911146164, |
|
"rewards/rejected": -0.017717575654387474, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.088638927549016e-07, |
|
"logits/chosen": -1.4024112224578857, |
|
"logits/rejected": -1.0277204513549805, |
|
"logps/chosen": -257.0721740722656, |
|
"logps/rejected": -223.2074432373047, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.033399466425180435, |
|
"rewards/margins": 0.05190381407737732, |
|
"rewards/margins_max": 0.07284527271986008, |
|
"rewards/margins_min": 0.03096235729753971, |
|
"rewards/margins_std": 0.029615694656968117, |
|
"rewards/rejected": -0.018504345789551735, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.067309514735267e-07, |
|
"logits/chosen": -1.2835520505905151, |
|
"logits/rejected": -0.9591856002807617, |
|
"logps/chosen": -253.2421417236328, |
|
"logps/rejected": -214.3036651611328, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03112444281578064, |
|
"rewards/margins": 0.06105799227952957, |
|
"rewards/margins_max": 0.08269943296909332, |
|
"rewards/margins_min": 0.03941655158996582, |
|
"rewards/margins_std": 0.030605623498558998, |
|
"rewards/rejected": -0.02993355132639408, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.045790460269395e-07, |
|
"logits/chosen": -1.29916250705719, |
|
"logits/rejected": -0.9579310417175293, |
|
"logps/chosen": -222.2379608154297, |
|
"logps/rejected": -203.0634307861328, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03449912741780281, |
|
"rewards/margins": 0.04771226644515991, |
|
"rewards/margins_max": 0.0739569216966629, |
|
"rewards/margins_min": 0.02146761119365692, |
|
"rewards/margins_std": 0.03711555153131485, |
|
"rewards/rejected": -0.0132131427526474, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.02408436791856e-07, |
|
"logits/chosen": -1.3718782663345337, |
|
"logits/rejected": -1.0133472681045532, |
|
"logps/chosen": -234.8833770751953, |
|
"logps/rejected": -237.4645233154297, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03908390551805496, |
|
"rewards/margins": 0.06387855857610703, |
|
"rewards/margins_max": 0.0912180095911026, |
|
"rewards/margins_min": 0.03653910756111145, |
|
"rewards/margins_std": 0.038663819432258606, |
|
"rewards/rejected": -0.02479465678334236, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 4.0021938640811717e-07, |
|
"logits/chosen": -1.3344662189483643, |
|
"logits/rejected": -0.9591034054756165, |
|
"logps/chosen": -221.8365478515625, |
|
"logps/rejected": -358.8748779296875, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03558691591024399, |
|
"rewards/margins": 0.06812174618244171, |
|
"rewards/margins_max": 0.09799469262361526, |
|
"rewards/margins_min": 0.03824879601597786, |
|
"rewards/margins_std": 0.042246729135513306, |
|
"rewards/rejected": -0.032534826546907425, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 3.980121597469095e-07, |
|
"logits/chosen": -1.4173529148101807, |
|
"logits/rejected": -1.046112298965454, |
|
"logps/chosen": -222.1094970703125, |
|
"logps/rejected": -195.96484375, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02532361075282097, |
|
"rewards/margins": 0.04526478797197342, |
|
"rewards/margins_max": 0.06393333524465561, |
|
"rewards/margins_min": 0.02659623883664608, |
|
"rewards/margins_std": 0.02640131488442421, |
|
"rewards/rejected": -0.0199411790817976, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 3.9578702387871735e-07, |
|
"logits/chosen": -1.4770991802215576, |
|
"logits/rejected": -1.0594831705093384, |
|
"logps/chosen": -200.45314025878906, |
|
"logps/rejected": -181.73829650878906, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03991563245654106, |
|
"rewards/margins": 0.04643087834119797, |
|
"rewards/margins_max": 0.0705905631184578, |
|
"rewards/margins_min": 0.022271184250712395, |
|
"rewards/margins_std": 0.034166961908340454, |
|
"rewards/rejected": -0.00651524355635047, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 3.9354424804100647e-07, |
|
"logits/chosen": -1.3302786350250244, |
|
"logits/rejected": -1.0419865846633911, |
|
"logps/chosen": -180.52911376953125, |
|
"logps/rejected": -229.23507690429688, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03989443928003311, |
|
"rewards/margins": 0.05144830420613289, |
|
"rewards/margins_max": 0.07513656467199326, |
|
"rewards/margins_min": 0.02776004932820797, |
|
"rewards/margins_std": 0.03350025415420532, |
|
"rewards/rejected": -0.0115538714453578, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 3.9128410360564793e-07, |
|
"logits/chosen": -1.4453057050704956, |
|
"logits/rejected": -0.843630313873291, |
|
"logps/chosen": -239.1511993408203, |
|
"logps/rejected": -228.0477294921875, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0347132682800293, |
|
"rewards/margins": 0.05644859001040459, |
|
"rewards/margins_max": 0.07778388261795044, |
|
"rewards/margins_min": 0.035113297402858734, |
|
"rewards/margins_std": 0.03017266094684601, |
|
"rewards/rejected": -0.02173532173037529, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 3.8900686404608174e-07, |
|
"logits/chosen": -1.4021894931793213, |
|
"logits/rejected": -1.1501901149749756, |
|
"logps/chosen": -246.96676635742188, |
|
"logps/rejected": -243.79055786132812, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02853170968592167, |
|
"rewards/margins": 0.05541776865720749, |
|
"rewards/margins_max": 0.0840243324637413, |
|
"rewards/margins_min": 0.026811202988028526, |
|
"rewards/margins_std": 0.04045579582452774, |
|
"rewards/rejected": -0.02688606083393097, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 3.8671280490422753e-07, |
|
"logits/chosen": -1.4860647916793823, |
|
"logits/rejected": -1.2145134210586548, |
|
"logps/chosen": -173.87155151367188, |
|
"logps/rejected": -215.08114624023438, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03185255080461502, |
|
"rewards/margins": 0.0523541197180748, |
|
"rewards/margins_max": 0.07217199355363846, |
|
"rewards/margins_min": 0.032536253333091736, |
|
"rewards/margins_std": 0.028026703745126724, |
|
"rewards/rejected": -0.020501574501395226, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 3.8440220375714435e-07, |
|
"logits/chosen": -1.4330469369888306, |
|
"logits/rejected": -0.923498809337616, |
|
"logps/chosen": -194.24989318847656, |
|
"logps/rejected": -188.0842742919922, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02217457816004753, |
|
"rewards/margins": 0.05012967437505722, |
|
"rewards/margins_max": 0.07560008764266968, |
|
"rewards/margins_min": 0.02465927042067051, |
|
"rewards/margins_std": 0.03602059185504913, |
|
"rewards/rejected": -0.02795509621500969, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 3.8207534018344434e-07, |
|
"logits/chosen": -1.4624649286270142, |
|
"logits/rejected": -1.2272025346755981, |
|
"logps/chosen": -224.9335479736328, |
|
"logps/rejected": -215.4106903076172, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02788296714425087, |
|
"rewards/margins": 0.05390559881925583, |
|
"rewards/margins_max": 0.07784163951873779, |
|
"rewards/margins_min": 0.02996954880654812, |
|
"rewards/margins_std": 0.033850688487291336, |
|
"rewards/rejected": -0.02602263353765011, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 3.797324957294643e-07, |
|
"logits/chosen": -1.4522289037704468, |
|
"logits/rejected": -1.0942248106002808, |
|
"logps/chosen": -197.66709899902344, |
|
"logps/rejected": -188.29644775390625, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028640951961278915, |
|
"rewards/margins": 0.04304185137152672, |
|
"rewards/margins_max": 0.06764715909957886, |
|
"rewards/margins_min": 0.01843653805553913, |
|
"rewards/margins_std": 0.034797169268131256, |
|
"rewards/rejected": -0.014400901272892952, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 3.773739538751988e-07, |
|
"logits/chosen": -1.4544618129730225, |
|
"logits/rejected": -1.0294139385223389, |
|
"logps/chosen": -245.71435546875, |
|
"logps/rejected": -208.09115600585938, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.032376714050769806, |
|
"rewards/margins": 0.04516047239303589, |
|
"rewards/margins_max": 0.06522423774003983, |
|
"rewards/margins_min": 0.025096703320741653, |
|
"rewards/margins_std": 0.028374452143907547, |
|
"rewards/rejected": -0.012783756479620934, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.39158034324646, |
|
"logits/rejected": -0.9538629651069641, |
|
"logps/chosen": -339.5301513671875, |
|
"logps/rejected": -214.0384521484375, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03498070687055588, |
|
"rewards/margins": 0.05068878084421158, |
|
"rewards/margins_max": 0.07239842414855957, |
|
"rewards/margins_min": 0.028979141265153885, |
|
"rewards/margins_std": 0.030702069401741028, |
|
"rewards/rejected": -0.015708070248365402, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 3.7261092134804695e-07, |
|
"logits/chosen": -1.313458800315857, |
|
"logits/rejected": -0.9937132000923157, |
|
"logps/chosen": -205.0299835205078, |
|
"logps/rejected": -212.155029296875, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03651620075106621, |
|
"rewards/margins": 0.05705242604017258, |
|
"rewards/margins_max": 0.08746035397052765, |
|
"rewards/margins_min": 0.026644494384527206, |
|
"rewards/margins_std": 0.043003302067518234, |
|
"rewards/rejected": -0.020536217838525772, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 3.702070069935898e-07, |
|
"logits/chosen": -1.4626922607421875, |
|
"logits/rejected": -1.015981674194336, |
|
"logps/chosen": -227.63339233398438, |
|
"logps/rejected": -221.4516143798828, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03474265709519386, |
|
"rewards/margins": 0.051122285425662994, |
|
"rewards/margins_max": 0.06716804951429367, |
|
"rewards/margins_min": 0.03507651016116142, |
|
"rewards/margins_std": 0.02269214577972889, |
|
"rewards/rejected": -0.016379622742533684, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 3.6778854780597213e-07, |
|
"logits/chosen": -1.2919328212738037, |
|
"logits/rejected": -0.9956780672073364, |
|
"logps/chosen": -222.3484344482422, |
|
"logps/rejected": -182.62179565429688, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.030011823400855064, |
|
"rewards/margins": 0.0488753467798233, |
|
"rewards/margins_max": 0.06740256398916245, |
|
"rewards/margins_min": 0.030348125845193863, |
|
"rewards/margins_std": 0.02620144747197628, |
|
"rewards/rejected": -0.01886352151632309, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 3.653558364144363e-07, |
|
"logits/chosen": -1.4199802875518799, |
|
"logits/rejected": -1.1749187707901, |
|
"logps/chosen": -182.4161376953125, |
|
"logps/rejected": -217.2281951904297, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03831896930932999, |
|
"rewards/margins": 0.053058166056871414, |
|
"rewards/margins_max": 0.07967302948236465, |
|
"rewards/margins_min": 0.026443298906087875, |
|
"rewards/margins_std": 0.03763909637928009, |
|
"rewards/rejected": -0.014739197678864002, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 3.629091671727159e-07, |
|
"logits/chosen": -1.383264422416687, |
|
"logits/rejected": -0.935562252998352, |
|
"logps/chosen": -236.8832550048828, |
|
"logps/rejected": -232.43701171875, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03460818529129028, |
|
"rewards/margins": 0.05942409485578537, |
|
"rewards/margins_max": 0.08626364171504974, |
|
"rewards/margins_min": 0.0325845405459404, |
|
"rewards/margins_std": 0.037956852465867996, |
|
"rewards/rejected": -0.02481590211391449, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.6044883612341957e-07, |
|
"logits/chosen": -1.4922215938568115, |
|
"logits/rejected": -1.210303544998169, |
|
"logps/chosen": -175.39468383789062, |
|
"logps/rejected": -175.10372924804688, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0377926230430603, |
|
"rewards/margins": 0.044999849051237106, |
|
"rewards/margins_max": 0.06272000819444656, |
|
"rewards/margins_min": 0.027279695495963097, |
|
"rewards/margins_std": 0.025060083717107773, |
|
"rewards/rejected": -0.007207226939499378, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 3.5797514096221024e-07, |
|
"logits/chosen": -1.447775959968567, |
|
"logits/rejected": -1.1010136604309082, |
|
"logps/chosen": -233.7635040283203, |
|
"logps/rejected": -213.46658325195312, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.032312069088220596, |
|
"rewards/margins": 0.052999138832092285, |
|
"rewards/margins_max": 0.07665625959634781, |
|
"rewards/margins_min": 0.02934201993048191, |
|
"rewards/margins_std": 0.033456217497587204, |
|
"rewards/rejected": -0.02068706974387169, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 3.554883810017844e-07, |
|
"logits/chosen": -1.3156002759933472, |
|
"logits/rejected": -1.0745857954025269, |
|
"logps/chosen": -181.6421661376953, |
|
"logps/rejected": -183.23080444335938, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.030048031359910965, |
|
"rewards/margins": 0.04210250452160835, |
|
"rewards/margins_max": 0.07046877592802048, |
|
"rewards/margins_min": 0.013736230321228504, |
|
"rewards/margins_std": 0.04011595994234085, |
|
"rewards/rejected": -0.01205446757376194, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 3.529888571356561e-07, |
|
"logits/chosen": -1.2578437328338623, |
|
"logits/rejected": -1.0070809125900269, |
|
"logps/chosen": -250.3462677001953, |
|
"logps/rejected": -232.718994140625, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026622626930475235, |
|
"rewards/margins": 0.04009575396776199, |
|
"rewards/margins_max": 0.06229530647397041, |
|
"rewards/margins_min": 0.017896197736263275, |
|
"rewards/margins_std": 0.03139491006731987, |
|
"rewards/rejected": -0.01347312517464161, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.50476871801749e-07, |
|
"logits/chosen": -1.374895453453064, |
|
"logits/rejected": -0.9742172956466675, |
|
"logps/chosen": -298.19110107421875, |
|
"logps/rejected": -209.47329711914062, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03527087718248367, |
|
"rewards/margins": 0.04785536974668503, |
|
"rewards/margins_max": 0.06894843280315399, |
|
"rewards/margins_min": 0.026762310415506363, |
|
"rewards/margins_std": 0.029830092564225197, |
|
"rewards/rejected": -0.012584498152136803, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 3.479527289458021e-07, |
|
"logits/chosen": -1.3711079359054565, |
|
"logits/rejected": -1.0774781703948975, |
|
"logps/chosen": -184.34344482421875, |
|
"logps/rejected": -224.1437225341797, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03007492795586586, |
|
"rewards/margins": 0.05908045917749405, |
|
"rewards/margins_max": 0.09251175820827484, |
|
"rewards/margins_min": 0.02564915083348751, |
|
"rewards/margins_std": 0.04727901145815849, |
|
"rewards/rejected": -0.029005536809563637, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 3.4541673398459315e-07, |
|
"logits/chosen": -1.293668508529663, |
|
"logits/rejected": -1.0986145734786987, |
|
"logps/chosen": -209.5894012451172, |
|
"logps/rejected": -229.1302490234375, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0236887875944376, |
|
"rewards/margins": 0.04496780037879944, |
|
"rewards/margins_max": 0.06106124445796013, |
|
"rewards/margins_min": 0.028874356299638748, |
|
"rewards/margins_std": 0.0227595716714859, |
|
"rewards/rejected": -0.021279016509652138, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 3.4286919376898303e-07, |
|
"logits/chosen": -1.2458115816116333, |
|
"logits/rejected": -0.9769574403762817, |
|
"logps/chosen": -219.8367919921875, |
|
"logps/rejected": -227.66421508789062, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028372962027788162, |
|
"rewards/margins": 0.04673437029123306, |
|
"rewards/margins_max": 0.06330729275941849, |
|
"rewards/margins_min": 0.030161460861563683, |
|
"rewards/margins_std": 0.02343764156103134, |
|
"rewards/rejected": -0.018361413851380348, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 3.403104165467883e-07, |
|
"logits/chosen": -1.3929589986801147, |
|
"logits/rejected": -1.1880546808242798, |
|
"logps/chosen": -276.8525085449219, |
|
"logps/rejected": -241.208740234375, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03430411219596863, |
|
"rewards/margins": 0.04882946237921715, |
|
"rewards/margins_max": 0.08031658828258514, |
|
"rewards/margins_min": 0.017342329025268555, |
|
"rewards/margins_std": 0.04452953487634659, |
|
"rewards/rejected": -0.014525346457958221, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 3.377407119254826e-07, |
|
"logits/chosen": -1.307857632637024, |
|
"logits/rejected": -0.973365306854248, |
|
"logps/chosen": -262.1522521972656, |
|
"logps/rejected": -219.1666717529297, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03059108555316925, |
|
"rewards/margins": 0.05130365490913391, |
|
"rewards/margins_max": 0.07557855546474457, |
|
"rewards/margins_min": 0.027028745040297508, |
|
"rewards/margins_std": 0.03432989865541458, |
|
"rewards/rejected": -0.02071256935596466, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 3.351603908347359e-07, |
|
"logits/chosen": -1.3961646556854248, |
|
"logits/rejected": -1.0634922981262207, |
|
"logps/chosen": -244.14907836914062, |
|
"logps/rejected": -209.52413940429688, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02553880773484707, |
|
"rewards/margins": 0.039372727274894714, |
|
"rewards/margins_max": 0.05578699707984924, |
|
"rewards/margins_min": 0.022958464920520782, |
|
"rewards/margins_std": 0.023213278502225876, |
|
"rewards/rejected": -0.013833923265337944, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 3.325697654887918e-07, |
|
"logits/chosen": -1.457953929901123, |
|
"logits/rejected": -1.1763416528701782, |
|
"logps/chosen": -168.73855590820312, |
|
"logps/rejected": -200.1396026611328, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.039046648889780045, |
|
"rewards/margins": 0.061664480715990067, |
|
"rewards/margins_max": 0.08563290536403656, |
|
"rewards/margins_min": 0.03769605979323387, |
|
"rewards/margins_std": 0.03389647603034973, |
|
"rewards/rejected": -0.02261783741414547, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 3.2996914934869034e-07, |
|
"logits/chosen": -1.4136825799942017, |
|
"logits/rejected": -0.9438567161560059, |
|
"logps/chosen": -211.27880859375, |
|
"logps/rejected": -251.1080322265625, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.05148143321275711, |
|
"rewards/margins": 0.05926315858960152, |
|
"rewards/margins_max": 0.0925159901380539, |
|
"rewards/margins_min": 0.026010334491729736, |
|
"rewards/margins_std": 0.04702659696340561, |
|
"rewards/rejected": -0.00778172304853797, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 3.273588570843399e-07, |
|
"logits/chosen": -1.3561222553253174, |
|
"logits/rejected": -0.8794288635253906, |
|
"logps/chosen": -219.59188842773438, |
|
"logps/rejected": -204.20651245117188, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04018976539373398, |
|
"rewards/margins": 0.058758050203323364, |
|
"rewards/margins_max": 0.08319707214832306, |
|
"rewards/margins_min": 0.034319035708904266, |
|
"rewards/margins_std": 0.034561995416879654, |
|
"rewards/rejected": -0.018568288534879684, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 3.2473920453644254e-07, |
|
"logits/chosen": -1.364458680152893, |
|
"logits/rejected": -1.1189966201782227, |
|
"logps/chosen": -200.58279418945312, |
|
"logps/rejected": -247.4306182861328, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03194325789809227, |
|
"rewards/margins": 0.058095790445804596, |
|
"rewards/margins_max": 0.08179818838834763, |
|
"rewards/margins_min": 0.03439338877797127, |
|
"rewards/margins_std": 0.03352025896310806, |
|
"rewards/rejected": -0.02615252695977688, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 3.2211050867827805e-07, |
|
"logits/chosen": -1.4114757776260376, |
|
"logits/rejected": -1.0227770805358887, |
|
"logps/chosen": -217.49783325195312, |
|
"logps/rejected": -270.8158874511719, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03752985596656799, |
|
"rewards/margins": 0.06543248146772385, |
|
"rewards/margins_max": 0.08699898421764374, |
|
"rewards/margins_min": 0.043865982443094254, |
|
"rewards/margins_std": 0.030499637126922607, |
|
"rewards/rejected": -0.02790263295173645, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 3.194730875773504e-07, |
|
"logits/chosen": -1.3351142406463623, |
|
"logits/rejected": -1.0667884349822998, |
|
"logps/chosen": -226.33425903320312, |
|
"logps/rejected": -211.48983764648438, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03304114192724228, |
|
"rewards/margins": 0.047557245939970016, |
|
"rewards/margins_max": 0.06921641528606415, |
|
"rewards/margins_min": 0.02589806541800499, |
|
"rewards/margins_std": 0.030630702152848244, |
|
"rewards/rejected": -0.014516102150082588, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 3.168272603569025e-07, |
|
"logits/chosen": -1.4025719165802002, |
|
"logits/rejected": -0.8659202456474304, |
|
"logps/chosen": -255.092529296875, |
|
"logps/rejected": -191.5826416015625, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.037762049585580826, |
|
"rewards/margins": 0.05839651823043823, |
|
"rewards/margins_max": 0.08473102748394012, |
|
"rewards/margins_min": 0.03206200897693634, |
|
"rewards/margins_std": 0.03724262863397598, |
|
"rewards/rejected": -0.020634472370147705, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 3.1417334715730257e-07, |
|
"logits/chosen": -1.312922716140747, |
|
"logits/rejected": -0.9928410649299622, |
|
"logps/chosen": -274.3824768066406, |
|
"logps/rejected": -207.8001251220703, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03649697080254555, |
|
"rewards/margins": 0.04787913337349892, |
|
"rewards/margins_max": 0.07063382118940353, |
|
"rewards/margins_min": 0.025124436244368553, |
|
"rewards/margins_std": 0.032179996371269226, |
|
"rewards/rejected": -0.01138215884566307, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 3.115116690973081e-07, |
|
"logits/chosen": -1.275967001914978, |
|
"logits/rejected": -1.0719497203826904, |
|
"logps/chosen": -170.84716796875, |
|
"logps/rejected": -187.09201049804688, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.030098671093583107, |
|
"rewards/margins": 0.04616239666938782, |
|
"rewards/margins_max": 0.06867832690477371, |
|
"rewards/margins_min": 0.023646462708711624, |
|
"rewards/margins_std": 0.03184233605861664, |
|
"rewards/rejected": -0.01606372371315956, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.375, |
|
"learning_rate": 3.088425482352106e-07, |
|
"logits/chosen": -1.3329031467437744, |
|
"logits/rejected": -0.9551903009414673, |
|
"logps/chosen": -178.49220275878906, |
|
"logps/rejected": -163.50289916992188, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02671188674867153, |
|
"rewards/margins": 0.05042758584022522, |
|
"rewards/margins_max": 0.07536738365888596, |
|
"rewards/margins_min": 0.025487786158919334, |
|
"rewards/margins_std": 0.03527020663022995, |
|
"rewards/rejected": -0.023715700954198837, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 3.061663075298675e-07, |
|
"logits/chosen": -1.5138485431671143, |
|
"logits/rejected": -1.1314074993133545, |
|
"logps/chosen": -250.61813354492188, |
|
"logps/rejected": -272.20379638671875, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03775627166032791, |
|
"rewards/margins": 0.05153984948992729, |
|
"rewards/margins_max": 0.07664564251899719, |
|
"rewards/margins_min": 0.026434045284986496, |
|
"rewards/margins_std": 0.035504959523677826, |
|
"rewards/rejected": -0.013783574104309082, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 3.034832708016243e-07, |
|
"logits/chosen": -1.5145914554595947, |
|
"logits/rejected": -1.0713765621185303, |
|
"logps/chosen": -261.14312744140625, |
|
"logps/rejected": -210.29232788085938, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03341571241617203, |
|
"rewards/margins": 0.048537809401750565, |
|
"rewards/margins_max": 0.07006336748600006, |
|
"rewards/margins_min": 0.027012262493371964, |
|
"rewards/margins_std": 0.030441725626587868, |
|
"rewards/rejected": -0.01512210350483656, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 3.0079376269313354e-07, |
|
"logits/chosen": -1.4111496210098267, |
|
"logits/rejected": -1.072613000869751, |
|
"logps/chosen": -207.9450225830078, |
|
"logps/rejected": -267.03912353515625, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.029351189732551575, |
|
"rewards/margins": 0.05201409012079239, |
|
"rewards/margins_max": 0.07417653501033783, |
|
"rewards/margins_min": 0.0298516396433115, |
|
"rewards/margins_std": 0.031342435628175735, |
|
"rewards/rejected": -0.022662896662950516, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 2.9809810863007284e-07, |
|
"logits/chosen": -1.4359506368637085, |
|
"logits/rejected": -1.0733433961868286, |
|
"logps/chosen": -200.97647094726562, |
|
"logps/rejected": -209.4395751953125, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.039248835295438766, |
|
"rewards/margins": 0.04961882531642914, |
|
"rewards/margins_max": 0.0718330442905426, |
|
"rewards/margins_min": 0.027404606342315674, |
|
"rewards/margins_std": 0.031415652483701706, |
|
"rewards/rejected": -0.010369991883635521, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.9539663478176946e-07, |
|
"logits/chosen": -1.2646214962005615, |
|
"logits/rejected": -1.1139628887176514, |
|
"logps/chosen": -206.5272674560547, |
|
"logps/rejected": -250.39108276367188, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02430318295955658, |
|
"rewards/margins": 0.051103752106428146, |
|
"rewards/margins_max": 0.0777682214975357, |
|
"rewards/margins_min": 0.024439293891191483, |
|
"rewards/margins_std": 0.03770923987030983, |
|
"rewards/rejected": -0.026800569146871567, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 2.9268966802173436e-07, |
|
"logits/chosen": -1.3860819339752197, |
|
"logits/rejected": -0.975805938243866, |
|
"logps/chosen": -270.6651611328125, |
|
"logps/rejected": -221.06259155273438, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03461884707212448, |
|
"rewards/margins": 0.05071113631129265, |
|
"rewards/margins_max": 0.0760193020105362, |
|
"rewards/margins_min": 0.025402987375855446, |
|
"rewards/margins_std": 0.03579113632440567, |
|
"rewards/rejected": -0.016092294827103615, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 2.89977535888111e-07, |
|
"logits/chosen": -1.3565785884857178, |
|
"logits/rejected": -0.9915903210639954, |
|
"logps/chosen": -177.0413055419922, |
|
"logps/rejected": -182.9870147705078, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03303280100226402, |
|
"rewards/margins": 0.06585012376308441, |
|
"rewards/margins_max": 0.08999715745449066, |
|
"rewards/margins_min": 0.04170309379696846, |
|
"rewards/margins_std": 0.034149058163166046, |
|
"rewards/rejected": -0.03281732648611069, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 2.872605665440436e-07, |
|
"logits/chosen": -1.3481905460357666, |
|
"logits/rejected": -1.1729605197906494, |
|
"logps/chosen": -169.9842529296875, |
|
"logps/rejected": -223.30044555664062, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.032422084361314774, |
|
"rewards/margins": 0.055260200053453445, |
|
"rewards/margins_max": 0.07780520617961884, |
|
"rewards/margins_min": 0.03271518647670746, |
|
"rewards/margins_std": 0.03188345581293106, |
|
"rewards/rejected": -0.02283811755478382, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 2.845390887379706e-07, |
|
"logits/chosen": -1.4345109462738037, |
|
"logits/rejected": -1.1150403022766113, |
|
"logps/chosen": -225.3082275390625, |
|
"logps/rejected": -199.63519287109375, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.034576646983623505, |
|
"rewards/margins": 0.04932091385126114, |
|
"rewards/margins_max": 0.06937690079212189, |
|
"rewards/margins_min": 0.029264941811561584, |
|
"rewards/margins_std": 0.028363442048430443, |
|
"rewards/rejected": -0.014744272455573082, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 2.8181343176384585e-07, |
|
"logits/chosen": -1.2172272205352783, |
|
"logits/rejected": -1.0032122135162354, |
|
"logps/chosen": -194.42764282226562, |
|
"logps/rejected": -336.8403015136719, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02610202692449093, |
|
"rewards/margins": 0.06659840792417526, |
|
"rewards/margins_max": 0.09039248526096344, |
|
"rewards/margins_min": 0.04280433803796768, |
|
"rewards/margins_std": 0.03364989906549454, |
|
"rewards/rejected": -0.04049638658761978, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 2.7908392542129537e-07, |
|
"logits/chosen": -1.491234540939331, |
|
"logits/rejected": -1.1456706523895264, |
|
"logps/chosen": -226.4430694580078, |
|
"logps/rejected": -264.64874267578125, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.036219272762537, |
|
"rewards/margins": 0.06396204233169556, |
|
"rewards/margins_max": 0.0869758352637291, |
|
"rewards/margins_min": 0.040948253124952316, |
|
"rewards/margins_std": 0.032546427100896835, |
|
"rewards/rejected": -0.02774277701973915, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.763508999757119e-07, |
|
"logits/chosen": -1.4049649238586426, |
|
"logits/rejected": -1.239553689956665, |
|
"logps/chosen": -215.4875030517578, |
|
"logps/rejected": -298.31365966796875, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03639969974756241, |
|
"rewards/margins": 0.053609687834978104, |
|
"rewards/margins_max": 0.07144194096326828, |
|
"rewards/margins_min": 0.035777442157268524, |
|
"rewards/margins_std": 0.025218605995178223, |
|
"rewards/rejected": -0.017209986224770546, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 2.7361468611829326e-07, |
|
"logits/chosen": -1.4899475574493408, |
|
"logits/rejected": -1.128447413444519, |
|
"logps/chosen": -200.3207550048828, |
|
"logps/rejected": -228.01718139648438, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03625740110874176, |
|
"rewards/margins": 0.0598982498049736, |
|
"rewards/margins_max": 0.09039153158664703, |
|
"rewards/margins_min": 0.029404977336525917, |
|
"rewards/margins_std": 0.043124008923769, |
|
"rewards/rejected": -0.02364085428416729, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.708756149260292e-07, |
|
"logits/chosen": -1.4126758575439453, |
|
"logits/rejected": -1.0123107433319092, |
|
"logps/chosen": -235.05734252929688, |
|
"logps/rejected": -203.85006713867188, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.034583888947963715, |
|
"rewards/margins": 0.052448056638240814, |
|
"rewards/margins_max": 0.07766715437173843, |
|
"rewards/margins_min": 0.027228962630033493, |
|
"rewards/margins_std": 0.03566519170999527, |
|
"rewards/rejected": -0.0178641676902771, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 2.681340178216423e-07, |
|
"logits/chosen": -1.6247339248657227, |
|
"logits/rejected": -1.223256230354309, |
|
"logps/chosen": -237.5697784423828, |
|
"logps/rejected": -252.75521850585938, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03515109419822693, |
|
"rewards/margins": 0.06105039268732071, |
|
"rewards/margins_max": 0.08089162409305573, |
|
"rewards/margins_min": 0.041209153831005096, |
|
"rewards/margins_std": 0.028059745207428932, |
|
"rewards/rejected": -0.02589929662644863, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 2.6539022653348575e-07, |
|
"logits/chosen": -1.3141326904296875, |
|
"logits/rejected": -0.9784961938858032, |
|
"logps/chosen": -204.03591918945312, |
|
"logps/rejected": -265.62591552734375, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03488199785351753, |
|
"rewards/margins": 0.06790916621685028, |
|
"rewards/margins_max": 0.09974372386932373, |
|
"rewards/margins_min": 0.03607460856437683, |
|
"rewards/margins_std": 0.04502086713910103, |
|
"rewards/rejected": -0.03302717208862305, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 2.62644573055405e-07, |
|
"logits/chosen": -1.527411699295044, |
|
"logits/rejected": -1.0853965282440186, |
|
"logps/chosen": -193.60665893554688, |
|
"logps/rejected": -200.410888671875, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.037782810628414154, |
|
"rewards/margins": 0.06309525668621063, |
|
"rewards/margins_max": 0.0949065238237381, |
|
"rewards/margins_min": 0.031283993273973465, |
|
"rewards/margins_std": 0.04498792067170143, |
|
"rewards/rejected": -0.02531243860721588, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 2.598973896065674e-07, |
|
"logits/chosen": -1.1190847158432007, |
|
"logits/rejected": -0.9498281478881836, |
|
"logps/chosen": -246.06240844726562, |
|
"logps/rejected": -278.57708740234375, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.029680589213967323, |
|
"rewards/margins": 0.06372956186532974, |
|
"rewards/margins_max": 0.08478715270757675, |
|
"rewards/margins_min": 0.04267194867134094, |
|
"rewards/margins_std": 0.029779959470033646, |
|
"rewards/rejected": -0.03404896706342697, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 2.571490085912638e-07, |
|
"logits/chosen": -1.294392704963684, |
|
"logits/rejected": -0.901209831237793, |
|
"logps/chosen": -222.6404571533203, |
|
"logps/rejected": -221.46646118164062, |
|
"loss": 0.666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.035451389849185944, |
|
"rewards/margins": 0.06020699813961983, |
|
"rewards/margins_max": 0.08351422101259232, |
|
"rewards/margins_min": 0.036899782717227936, |
|
"rewards/margins_std": 0.03296138346195221, |
|
"rewards/rejected": -0.024755608290433884, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 2.5439976255868846e-07, |
|
"logits/chosen": -1.3172805309295654, |
|
"logits/rejected": -0.9587199091911316, |
|
"logps/chosen": -201.81642150878906, |
|
"logps/rejected": -264.8630065917969, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02492239698767662, |
|
"rewards/margins": 0.06449567526578903, |
|
"rewards/margins_max": 0.10114102065563202, |
|
"rewards/margins_min": 0.02785031870007515, |
|
"rewards/margins_std": 0.051824361085891724, |
|
"rewards/rejected": -0.03957327455282211, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.5164998416270137e-07, |
|
"logits/chosen": -1.4752823114395142, |
|
"logits/rejected": -1.1924030780792236, |
|
"logps/chosen": -225.65927124023438, |
|
"logps/rejected": -236.69290161132812, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.025519024580717087, |
|
"rewards/margins": 0.05898071080446243, |
|
"rewards/margins_max": 0.09197360277175903, |
|
"rewards/margins_min": 0.025987815111875534, |
|
"rewards/margins_std": 0.04665899649262428, |
|
"rewards/rejected": -0.033461686223745346, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 2.489000061215775e-07, |
|
"logits/chosen": -1.3754206895828247, |
|
"logits/rejected": -1.0634129047393799, |
|
"logps/chosen": -212.5056915283203, |
|
"logps/rejected": -217.0105438232422, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03384281322360039, |
|
"rewards/margins": 0.05164814740419388, |
|
"rewards/margins_max": 0.0746842697262764, |
|
"rewards/margins_min": 0.02861202321946621, |
|
"rewards/margins_std": 0.0325779989361763, |
|
"rewards/rejected": -0.01780533231794834, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 2.461501611777483e-07, |
|
"logits/chosen": -1.3263044357299805, |
|
"logits/rejected": -1.0537205934524536, |
|
"logps/chosen": -197.9228973388672, |
|
"logps/rejected": -214.32839965820312, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.030653411522507668, |
|
"rewards/margins": 0.04631539434194565, |
|
"rewards/margins_max": 0.06874962151050568, |
|
"rewards/margins_min": 0.023881174623966217, |
|
"rewards/margins_std": 0.03172678127884865, |
|
"rewards/rejected": -0.01566198468208313, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 2.4340078205754096e-07, |
|
"logits/chosen": -1.4674514532089233, |
|
"logits/rejected": -1.0580947399139404, |
|
"logps/chosen": -228.774169921875, |
|
"logps/rejected": -245.3206329345703, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03198526054620743, |
|
"rewards/margins": 0.060487449169158936, |
|
"rewards/margins_max": 0.09252621978521347, |
|
"rewards/margins_min": 0.028448667377233505, |
|
"rewards/margins_std": 0.045309677720069885, |
|
"rewards/rejected": -0.02850218489766121, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 2.406522014309186e-07, |
|
"logits/chosen": -1.3413441181182861, |
|
"logits/rejected": -1.0260752439498901, |
|
"logps/chosen": -217.0348358154297, |
|
"logps/rejected": -218.7316436767578, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03381979838013649, |
|
"rewards/margins": 0.05491740256547928, |
|
"rewards/margins_max": 0.08126216381788254, |
|
"rewards/margins_min": 0.028572645038366318, |
|
"rewards/margins_std": 0.037257120013237, |
|
"rewards/rejected": -0.021097611635923386, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 2.3790475187122832e-07, |
|
"logits/chosen": -1.3534529209136963, |
|
"logits/rejected": -1.0642507076263428, |
|
"logps/chosen": -203.16989135742188, |
|
"logps/rejected": -185.0489044189453, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03119819238781929, |
|
"rewards/margins": 0.058540262281894684, |
|
"rewards/margins_max": 0.0830526053905487, |
|
"rewards/margins_min": 0.034027911722660065, |
|
"rewards/margins_std": 0.034665681421756744, |
|
"rewards/rejected": -0.027342066168785095, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 2.351587658149598e-07, |
|
"logits/chosen": -1.453975796699524, |
|
"logits/rejected": -0.9396857023239136, |
|
"logps/chosen": -307.119140625, |
|
"logps/rejected": -293.79193115234375, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04417193681001663, |
|
"rewards/margins": 0.06028149649500847, |
|
"rewards/margins_max": 0.08487708121538162, |
|
"rewards/margins_min": 0.03568592667579651, |
|
"rewards/margins_std": 0.03478339686989784, |
|
"rewards/rejected": -0.016109565272927284, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 2.3241457552152187e-07, |
|
"logits/chosen": -1.2886158227920532, |
|
"logits/rejected": -0.8535853624343872, |
|
"logps/chosen": -255.9151153564453, |
|
"logps/rejected": -190.72183227539062, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0356779471039772, |
|
"rewards/margins": 0.06389064341783524, |
|
"rewards/margins_max": 0.09344568848609924, |
|
"rewards/margins_min": 0.034335602074861526, |
|
"rewards/margins_std": 0.04179714247584343, |
|
"rewards/rejected": -0.028212696313858032, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 2.2967251303303876e-07, |
|
"logits/chosen": -1.2967920303344727, |
|
"logits/rejected": -1.069603443145752, |
|
"logps/chosen": -174.32562255859375, |
|
"logps/rejected": -198.73556518554688, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02710595726966858, |
|
"rewards/margins": 0.04836275056004524, |
|
"rewards/margins_max": 0.07030778378248215, |
|
"rewards/margins_min": 0.02641770802438259, |
|
"rewards/margins_std": 0.03103497065603733, |
|
"rewards/rejected": -0.021256795153021812, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 2.2693291013417452e-07, |
|
"logits/chosen": -1.3830006122589111, |
|
"logits/rejected": -1.131734848022461, |
|
"logps/chosen": -196.27232360839844, |
|
"logps/rejected": -220.3488311767578, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.04211854934692383, |
|
"rewards/margins": 0.054630208760499954, |
|
"rewards/margins_max": 0.08319230377674103, |
|
"rewards/margins_min": 0.026068110018968582, |
|
"rewards/margins_std": 0.0403929129242897, |
|
"rewards/rejected": -0.012511657550930977, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.2419609831198695e-07, |
|
"logits/chosen": -1.314412236213684, |
|
"logits/rejected": -1.0906130075454712, |
|
"logps/chosen": -202.8844451904297, |
|
"logps/rejected": -282.2475280761719, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.029452290385961533, |
|
"rewards/margins": 0.06307311356067657, |
|
"rewards/margins_max": 0.08669252693653107, |
|
"rewards/margins_min": 0.03945370018482208, |
|
"rewards/margins_std": 0.03340289741754532, |
|
"rewards/rejected": -0.03362082317471504, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 2.2146240871581875e-07, |
|
"logits/chosen": -1.4870127439498901, |
|
"logits/rejected": -1.10221529006958, |
|
"logps/chosen": -257.47381591796875, |
|
"logps/rejected": -300.7210388183594, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.048544105142354965, |
|
"rewards/margins": 0.06802462041378021, |
|
"rewards/margins_max": 0.0930628627538681, |
|
"rewards/margins_min": 0.04298638552427292, |
|
"rewards/margins_std": 0.035409413278102875, |
|
"rewards/rejected": -0.019480522722005844, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 2.187321721172288e-07, |
|
"logits/chosen": -1.2666473388671875, |
|
"logits/rejected": -0.9587362408638, |
|
"logps/chosen": -202.96151733398438, |
|
"logps/rejected": -188.11402893066406, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.035519860684871674, |
|
"rewards/margins": 0.06863918900489807, |
|
"rewards/margins_max": 0.10405266284942627, |
|
"rewards/margins_min": 0.033225707709789276, |
|
"rewards/margins_std": 0.05008222907781601, |
|
"rewards/rejected": -0.0331193283200264, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 2.1600571886996932e-07, |
|
"logits/chosen": -1.409246563911438, |
|
"logits/rejected": -0.9662661552429199, |
|
"logps/chosen": -255.17337036132812, |
|
"logps/rejected": -237.2165069580078, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.034895267337560654, |
|
"rewards/margins": 0.060319460928440094, |
|
"rewards/margins_max": 0.08776156604290009, |
|
"rewards/margins_min": 0.032877348363399506, |
|
"rewards/margins_std": 0.038808997720479965, |
|
"rewards/rejected": -0.025424188002943993, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 2.1328337887001386e-07, |
|
"logits/chosen": -1.3689050674438477, |
|
"logits/rejected": -0.9174262881278992, |
|
"logps/chosen": -250.42257690429688, |
|
"logps/rejected": -213.65115356445312, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03616097569465637, |
|
"rewards/margins": 0.0674886554479599, |
|
"rewards/margins_max": 0.09155096858739853, |
|
"rewards/margins_min": 0.04342634230852127, |
|
"rewards/margins_std": 0.03402925282716751, |
|
"rewards/rejected": -0.03132767975330353, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.105654815156406e-07, |
|
"logits/chosen": -1.2773230075836182, |
|
"logits/rejected": -0.9415411949157715, |
|
"logps/chosen": -211.5564727783203, |
|
"logps/rejected": -241.5669403076172, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03792757913470268, |
|
"rewards/margins": 0.05873064324259758, |
|
"rewards/margins_max": 0.08351272344589233, |
|
"rewards/margins_min": 0.03394855558872223, |
|
"rewards/margins_std": 0.03504716232419014, |
|
"rewards/rejected": -0.0208030603826046, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 2.0785235566757517e-07, |
|
"logits/chosen": -1.5174918174743652, |
|
"logits/rejected": -1.0792747735977173, |
|
"logps/chosen": -274.3040466308594, |
|
"logps/rejected": -269.9195556640625, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.031049564480781555, |
|
"rewards/margins": 0.04908784478902817, |
|
"rewards/margins_max": 0.07297800481319427, |
|
"rewards/margins_min": 0.02519768849015236, |
|
"rewards/margins_std": 0.033785782754421234, |
|
"rewards/rejected": -0.01803828403353691, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 2.0514432960919976e-07, |
|
"logits/chosen": -1.3264081478118896, |
|
"logits/rejected": -0.8952063322067261, |
|
"logps/chosen": -275.90582275390625, |
|
"logps/rejected": -227.85183715820312, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.035590268671512604, |
|
"rewards/margins": 0.05433149263262749, |
|
"rewards/margins_max": 0.08622786402702332, |
|
"rewards/margins_min": 0.022435134276747704, |
|
"rewards/margins_std": 0.04510827362537384, |
|
"rewards/rejected": -0.01874123141169548, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 2.024417310068309e-07, |
|
"logits/chosen": -1.3526580333709717, |
|
"logits/rejected": -1.0428838729858398, |
|
"logps/chosen": -242.9093475341797, |
|
"logps/rejected": -221.0670928955078, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03770860657095909, |
|
"rewards/margins": 0.06553932279348373, |
|
"rewards/margins_max": 0.09631849825382233, |
|
"rewards/margins_min": 0.03476015478372574, |
|
"rewards/margins_std": 0.04352831840515137, |
|
"rewards/rejected": -0.027830716222524643, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.9974488687007272e-07, |
|
"logits/chosen": -1.321537733078003, |
|
"logits/rejected": -0.9563083648681641, |
|
"logps/chosen": -189.53338623046875, |
|
"logps/rejected": -208.65695190429688, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.037261709570884705, |
|
"rewards/margins": 0.050910621881484985, |
|
"rewards/margins_max": 0.08092696219682693, |
|
"rewards/margins_min": 0.020894277840852737, |
|
"rewards/margins_std": 0.042449526488780975, |
|
"rewards/rejected": -0.01364891231060028, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 1.9705412351224935e-07, |
|
"logits/chosen": -1.341074824333191, |
|
"logits/rejected": -1.031362533569336, |
|
"logps/chosen": -262.0687561035156, |
|
"logps/rejected": -209.2541046142578, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04073809087276459, |
|
"rewards/margins": 0.06763813644647598, |
|
"rewards/margins_max": 0.10079771280288696, |
|
"rewards/margins_min": 0.034478556364774704, |
|
"rewards/margins_std": 0.04689472168684006, |
|
"rewards/rejected": -0.026900043711066246, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 1.9436976651092142e-07, |
|
"logits/chosen": -1.4449079036712646, |
|
"logits/rejected": -1.0441436767578125, |
|
"logps/chosen": -323.22515869140625, |
|
"logps/rejected": -259.187744140625, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04286254942417145, |
|
"rewards/margins": 0.06869898736476898, |
|
"rewards/margins_max": 0.08886998146772385, |
|
"rewards/margins_min": 0.048527974635362625, |
|
"rewards/margins_std": 0.028526106849312782, |
|
"rewards/rejected": -0.025836432352662086, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.9169214066849198e-07, |
|
"logits/chosen": -1.3310493230819702, |
|
"logits/rejected": -1.0039780139923096, |
|
"logps/chosen": -207.80368041992188, |
|
"logps/rejected": -217.77279663085938, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.029799357056617737, |
|
"rewards/margins": 0.05040057748556137, |
|
"rewards/margins_max": 0.07911469042301178, |
|
"rewards/margins_min": 0.021686479449272156, |
|
"rewards/margins_std": 0.04060788080096245, |
|
"rewards/rejected": -0.02060122787952423, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.890215699729057e-07, |
|
"logits/chosen": -1.3599677085876465, |
|
"logits/rejected": -0.952431321144104, |
|
"logps/chosen": -220.8314971923828, |
|
"logps/rejected": -218.5143280029297, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03455578535795212, |
|
"rewards/margins": 0.059498321264982224, |
|
"rewards/margins_max": 0.08264943957328796, |
|
"rewards/margins_min": 0.03634720668196678, |
|
"rewards/margins_std": 0.032740626484155655, |
|
"rewards/rejected": -0.024942539632320404, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.8635837755844736e-07, |
|
"logits/chosen": -1.5396320819854736, |
|
"logits/rejected": -1.1135740280151367, |
|
"logps/chosen": -192.1985321044922, |
|
"logps/rejected": -189.65496826171875, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04031743109226227, |
|
"rewards/margins": 0.06330820918083191, |
|
"rewards/margins_max": 0.09154955297708511, |
|
"rewards/margins_min": 0.035066869109869, |
|
"rewards/margins_std": 0.03993929177522659, |
|
"rewards/rejected": -0.02299078181385994, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 1.837028856666426e-07, |
|
"logits/chosen": -1.396333932876587, |
|
"logits/rejected": -1.0482286214828491, |
|
"logps/chosen": -223.5980987548828, |
|
"logps/rejected": -197.462646484375, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03402381017804146, |
|
"rewards/margins": 0.05975471809506416, |
|
"rewards/margins_max": 0.09332195669412613, |
|
"rewards/margins_min": 0.026187485083937645, |
|
"rewards/margins_std": 0.04747123643755913, |
|
"rewards/rejected": -0.025730907917022705, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.8105541560726783e-07, |
|
"logits/chosen": -1.5116699934005737, |
|
"logits/rejected": -1.005076289176941, |
|
"logps/chosen": -216.2085418701172, |
|
"logps/rejected": -199.5402374267578, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.037188541144132614, |
|
"rewards/margins": 0.05942929536104202, |
|
"rewards/margins_max": 0.08686941862106323, |
|
"rewards/margins_min": 0.03198916092514992, |
|
"rewards/margins_std": 0.038806211203336716, |
|
"rewards/rejected": -0.02224075235426426, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.7841628771947186e-07, |
|
"logits/chosen": -1.4040260314941406, |
|
"logits/rejected": -0.965591549873352, |
|
"logps/chosen": -234.39431762695312, |
|
"logps/rejected": -202.01571655273438, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03427529335021973, |
|
"rewards/margins": 0.0503767728805542, |
|
"rewards/margins_max": 0.07114100456237793, |
|
"rewards/margins_min": 0.02961254119873047, |
|
"rewards/margins_std": 0.02936505898833275, |
|
"rewards/rejected": -0.016101477667689323, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.757858213330157e-07, |
|
"logits/chosen": -1.1877460479736328, |
|
"logits/rejected": -0.9582545161247253, |
|
"logps/chosen": -229.884033203125, |
|
"logps/rejected": -281.41351318359375, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03942031413316727, |
|
"rewards/margins": 0.06542594730854034, |
|
"rewards/margins_max": 0.09780795872211456, |
|
"rewards/margins_min": 0.03304394707083702, |
|
"rewards/margins_std": 0.04579506441950798, |
|
"rewards/rejected": -0.026005636900663376, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.7316433472963426e-07, |
|
"logits/chosen": -1.507406234741211, |
|
"logits/rejected": -1.1749341487884521, |
|
"logps/chosen": -281.5582580566406, |
|
"logps/rejected": -243.66110229492188, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03884587436914444, |
|
"rewards/margins": 0.059391576796770096, |
|
"rewards/margins_max": 0.08674292266368866, |
|
"rewards/margins_min": 0.032040227204561234, |
|
"rewards/margins_std": 0.03868064284324646, |
|
"rewards/rejected": -0.02054569497704506, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.7055214510452458e-07, |
|
"logits/chosen": -1.3578734397888184, |
|
"logits/rejected": -0.849805474281311, |
|
"logps/chosen": -331.993408203125, |
|
"logps/rejected": -279.07733154296875, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03726685792207718, |
|
"rewards/margins": 0.06775570660829544, |
|
"rewards/margins_max": 0.09739796817302704, |
|
"rewards/margins_min": 0.03811345621943474, |
|
"rewards/margins_std": 0.04192047566175461, |
|
"rewards/rejected": -0.03048885427415371, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.6794956852796616e-07, |
|
"logits/chosen": -1.421799659729004, |
|
"logits/rejected": -1.0734702348709106, |
|
"logps/chosen": -214.08364868164062, |
|
"logps/rejected": -222.42636108398438, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.046803176403045654, |
|
"rewards/margins": 0.07683407515287399, |
|
"rewards/margins_max": 0.11319296061992645, |
|
"rewards/margins_min": 0.040475185960531235, |
|
"rewards/margins_std": 0.05141923576593399, |
|
"rewards/rejected": -0.03003089688718319, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.653569199070764e-07, |
|
"logits/chosen": -1.437723994255066, |
|
"logits/rejected": -1.0029988288879395, |
|
"logps/chosen": -206.7332000732422, |
|
"logps/rejected": -232.79580688476562, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04119989275932312, |
|
"rewards/margins": 0.06901855766773224, |
|
"rewards/margins_max": 0.10958409309387207, |
|
"rewards/margins_min": 0.028453027829527855, |
|
"rewards/margins_std": 0.05736833065748215, |
|
"rewards/rejected": -0.027818670496344566, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.6277451294770832e-07, |
|
"logits/chosen": -1.427294135093689, |
|
"logits/rejected": -1.043678641319275, |
|
"logps/chosen": -173.60861206054688, |
|
"logps/rejected": -159.7396697998047, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03490697965025902, |
|
"rewards/margins": 0.05194821208715439, |
|
"rewards/margins_max": 0.0755261555314064, |
|
"rewards/margins_min": 0.028370272368192673, |
|
"rewards/margins_std": 0.03334423899650574, |
|
"rewards/rejected": -0.01704123243689537, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 1.6020266011649176e-07, |
|
"logits/chosen": -1.3484151363372803, |
|
"logits/rejected": -0.9436542391777039, |
|
"logps/chosen": -246.00296020507812, |
|
"logps/rejected": -232.9607391357422, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03393206372857094, |
|
"rewards/margins": 0.06296433508396149, |
|
"rewards/margins_max": 0.09208185970783234, |
|
"rewards/margins_min": 0.03384682536125183, |
|
"rewards/margins_std": 0.0411783829331398, |
|
"rewards/rejected": -0.029032278805971146, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.5764167260302608e-07, |
|
"logits/chosen": -1.269598364830017, |
|
"logits/rejected": -1.101138949394226, |
|
"logps/chosen": -212.4265594482422, |
|
"logps/rejected": -261.7176208496094, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02871812880039215, |
|
"rewards/margins": 0.059116750955581665, |
|
"rewards/margins_max": 0.081370510160923, |
|
"rewards/margins_min": 0.03686298802495003, |
|
"rewards/margins_std": 0.03147156536579132, |
|
"rewards/rejected": -0.030398612841963768, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.5509186028222653e-07, |
|
"logits/chosen": -1.3609730005264282, |
|
"logits/rejected": -0.8888334035873413, |
|
"logps/chosen": -240.88809204101562, |
|
"logps/rejected": -205.7561492919922, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.036828793585300446, |
|
"rewards/margins": 0.06859615445137024, |
|
"rewards/margins_max": 0.09640248119831085, |
|
"rewards/margins_min": 0.040789827704429626, |
|
"rewards/margins_std": 0.0393240861594677, |
|
"rewards/rejected": -0.031767360866069794, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.5255353167683017e-07, |
|
"logits/chosen": -1.4757276773452759, |
|
"logits/rejected": -1.0737035274505615, |
|
"logps/chosen": -197.3357696533203, |
|
"logps/rejected": -193.08956909179688, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03849588334560394, |
|
"rewards/margins": 0.049884069710969925, |
|
"rewards/margins_max": 0.07029401510953903, |
|
"rewards/margins_min": 0.02947412058711052, |
|
"rewards/margins_std": 0.028864026069641113, |
|
"rewards/rejected": -0.01138819195330143, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.500269939200648e-07, |
|
"logits/chosen": -1.4104186296463013, |
|
"logits/rejected": -1.1364113092422485, |
|
"logps/chosen": -180.714111328125, |
|
"logps/rejected": -193.08792114257812, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03277165815234184, |
|
"rewards/margins": 0.04854750260710716, |
|
"rewards/margins_max": 0.07237715274095535, |
|
"rewards/margins_min": 0.02471785433590412, |
|
"rewards/margins_std": 0.03370020538568497, |
|
"rewards/rejected": -0.01577584072947502, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.4751255271848661e-07, |
|
"logits/chosen": -1.3990291357040405, |
|
"logits/rejected": -1.111859917640686, |
|
"logps/chosen": -191.26333618164062, |
|
"logps/rejected": -209.4487762451172, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03463239595293999, |
|
"rewards/margins": 0.05561947077512741, |
|
"rewards/margins_max": 0.08030703663825989, |
|
"rewards/margins_min": 0.03093191422522068, |
|
"rewards/margins_std": 0.03491348773241043, |
|
"rewards/rejected": -0.02098708227276802, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 1.450105123149904e-07, |
|
"logits/chosen": -1.3517110347747803, |
|
"logits/rejected": -0.8976603746414185, |
|
"logps/chosen": -236.5410614013672, |
|
"logps/rejected": -285.66143798828125, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04675716906785965, |
|
"rewards/margins": 0.0757768452167511, |
|
"rewards/margins_max": 0.1105475053191185, |
|
"rewards/margins_min": 0.0410061851143837, |
|
"rewards/margins_std": 0.04917313903570175, |
|
"rewards/rejected": -0.029019678011536598, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.4252117545199638e-07, |
|
"logits/chosen": -1.2252193689346313, |
|
"logits/rejected": -1.2452119588851929, |
|
"logps/chosen": -129.21884155273438, |
|
"logps/rejected": -187.29981994628906, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02311699464917183, |
|
"rewards/margins": 0.053245484828948975, |
|
"rewards/margins_max": 0.08089035749435425, |
|
"rewards/margins_min": 0.025600602850317955, |
|
"rewards/margins_std": 0.03909575939178467, |
|
"rewards/rejected": -0.030128484591841698, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.400448433348191e-07, |
|
"logits/chosen": -1.3551205396652222, |
|
"logits/rejected": -1.0361279249191284, |
|
"logps/chosen": -181.05245971679688, |
|
"logps/rejected": -190.93905639648438, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.033756453543901443, |
|
"rewards/margins": 0.05258417874574661, |
|
"rewards/margins_max": 0.08322058618068695, |
|
"rewards/margins_min": 0.02194777876138687, |
|
"rewards/margins_std": 0.04332640767097473, |
|
"rewards/rejected": -0.01882772520184517, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.3758181559522219e-07, |
|
"logits/chosen": -1.3742306232452393, |
|
"logits/rejected": -1.1042159795761108, |
|
"logps/chosen": -195.7826690673828, |
|
"logps/rejected": -224.00357055664062, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.039305493235588074, |
|
"rewards/margins": 0.054510366171598434, |
|
"rewards/margins_max": 0.07698939740657806, |
|
"rewards/margins_min": 0.032031331211328506, |
|
"rewards/margins_std": 0.031790152192115784, |
|
"rewards/rejected": -0.015204873867332935, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.351323902551631e-07, |
|
"logits/chosen": -1.423339605331421, |
|
"logits/rejected": -1.0979268550872803, |
|
"logps/chosen": -188.20086669921875, |
|
"logps/rejected": -208.48483276367188, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.036755792796611786, |
|
"rewards/margins": 0.06653173267841339, |
|
"rewards/margins_max": 0.10358710587024689, |
|
"rewards/margins_min": 0.029476355761289597, |
|
"rewards/margins_std": 0.052404217422008514, |
|
"rewards/rejected": -0.029775941744446754, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.3269686369073347e-07, |
|
"logits/chosen": -1.4356403350830078, |
|
"logits/rejected": -0.9359350204467773, |
|
"logps/chosen": -255.5299530029297, |
|
"logps/rejected": -220.5718536376953, |
|
"loss": 0.663, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03550455719232559, |
|
"rewards/margins": 0.07294157147407532, |
|
"rewards/margins_max": 0.11041506379842758, |
|
"rewards/margins_min": 0.03546806797385216, |
|
"rewards/margins_std": 0.052995532751083374, |
|
"rewards/rejected": -0.037437014281749725, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 1.3027553059629776e-07, |
|
"logits/chosen": -1.270801305770874, |
|
"logits/rejected": -0.9209572076797485, |
|
"logps/chosen": -203.37147521972656, |
|
"logps/rejected": -237.0596160888672, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03659834340214729, |
|
"rewards/margins": 0.07047709822654724, |
|
"rewards/margins_max": 0.10837771743535995, |
|
"rewards/margins_min": 0.03257646784186363, |
|
"rewards/margins_std": 0.05359958857297897, |
|
"rewards/rejected": -0.03387875854969025, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.2786868394883615e-07, |
|
"logits/chosen": -1.3924726247787476, |
|
"logits/rejected": -0.9072662591934204, |
|
"logps/chosen": -237.67532348632812, |
|
"logps/rejected": -171.44007873535156, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03645704686641693, |
|
"rewards/margins": 0.04906289279460907, |
|
"rewards/margins_max": 0.07811780273914337, |
|
"rewards/margins_min": 0.02000797912478447, |
|
"rewards/margins_std": 0.041089847683906555, |
|
"rewards/rejected": -0.012605843134224415, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 1.2547661497249423e-07, |
|
"logits/chosen": -1.505576491355896, |
|
"logits/rejected": -1.0931254625320435, |
|
"logps/chosen": -251.4204559326172, |
|
"logps/rejected": -184.33187866210938, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03499855846166611, |
|
"rewards/margins": 0.060498736798763275, |
|
"rewards/margins_max": 0.09210414439439774, |
|
"rewards/margins_min": 0.028893321752548218, |
|
"rewards/margins_std": 0.04469680041074753, |
|
"rewards/rejected": -0.025500169023871422, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.2309961310334608e-07, |
|
"logits/chosen": -1.381753921508789, |
|
"logits/rejected": -1.0234613418579102, |
|
"logps/chosen": -209.87673950195312, |
|
"logps/rejected": -193.29415893554688, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.036537621170282364, |
|
"rewards/margins": 0.05814922973513603, |
|
"rewards/margins_max": 0.0908384695649147, |
|
"rewards/margins_min": 0.025459999218583107, |
|
"rewards/margins_std": 0.04622955992817879, |
|
"rewards/rejected": -0.021611608564853668, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 1.207379659543726e-07, |
|
"logits/chosen": -1.5136375427246094, |
|
"logits/rejected": -1.0719817876815796, |
|
"logps/chosen": -235.4477081298828, |
|
"logps/rejected": -190.52899169921875, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04006263613700867, |
|
"rewards/margins": 0.05348850414156914, |
|
"rewards/margins_max": 0.07529211789369583, |
|
"rewards/margins_min": 0.03168489784002304, |
|
"rewards/margins_std": 0.03083496168255806, |
|
"rewards/rejected": -0.013425871729850769, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 1.1839195928066101e-07, |
|
"logits/chosen": -1.5472790002822876, |
|
"logits/rejected": -1.063508152961731, |
|
"logps/chosen": -237.460205078125, |
|
"logps/rejected": -203.92752075195312, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03670421242713928, |
|
"rewards/margins": 0.0651477798819542, |
|
"rewards/margins_max": 0.09266404807567596, |
|
"rewards/margins_min": 0.03763151913881302, |
|
"rewards/margins_std": 0.038913875818252563, |
|
"rewards/rejected": -0.02844356931746006, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.1606187694482895e-07, |
|
"logits/chosen": -1.3274108171463013, |
|
"logits/rejected": -1.0006046295166016, |
|
"logps/chosen": -341.37298583984375, |
|
"logps/rejected": -298.43218994140625, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03555456921458244, |
|
"rewards/margins": 0.0692644715309143, |
|
"rewards/margins_max": 0.09813406318426132, |
|
"rewards/margins_min": 0.04039488732814789, |
|
"rewards/margins_std": 0.040827758610248566, |
|
"rewards/rejected": -0.03370990604162216, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.1374800088267766e-07, |
|
"logits/chosen": -1.3964722156524658, |
|
"logits/rejected": -0.8625639081001282, |
|
"logps/chosen": -256.6228332519531, |
|
"logps/rejected": -204.37188720703125, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03464067354798317, |
|
"rewards/margins": 0.06588082760572433, |
|
"rewards/margins_max": 0.09385097026824951, |
|
"rewards/margins_min": 0.03791068494319916, |
|
"rewards/margins_std": 0.03955575078725815, |
|
"rewards/rejected": -0.031240154057741165, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.1145061106907803e-07, |
|
"logits/chosen": -1.3579143285751343, |
|
"logits/rejected": -1.1530735492706299, |
|
"logps/chosen": -213.7913055419922, |
|
"logps/rejected": -274.8453674316406, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.039796892553567886, |
|
"rewards/margins": 0.07231783121824265, |
|
"rewards/margins_max": 0.1056450754404068, |
|
"rewards/margins_min": 0.03899059444665909, |
|
"rewards/margins_std": 0.04713182896375656, |
|
"rewards/rejected": -0.03252093866467476, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.0916998548409447e-07, |
|
"logits/chosen": -1.2776060104370117, |
|
"logits/rejected": -1.0304553508758545, |
|
"logps/chosen": -208.4978790283203, |
|
"logps/rejected": -255.5095977783203, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03622225672006607, |
|
"rewards/margins": 0.06858749687671661, |
|
"rewards/margins_max": 0.09841950237751007, |
|
"rewards/margins_min": 0.03875547647476196, |
|
"rewards/margins_std": 0.04218883812427521, |
|
"rewards/rejected": -0.032365236431360245, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.0690640007934978e-07, |
|
"logits/chosen": -1.365751028060913, |
|
"logits/rejected": -0.8165037035942078, |
|
"logps/chosen": -263.61102294921875, |
|
"logps/rejected": -221.7294158935547, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.036759573966264725, |
|
"rewards/margins": 0.057599522173404694, |
|
"rewards/margins_max": 0.08515635877847672, |
|
"rewards/margins_min": 0.030042681843042374, |
|
"rewards/margins_std": 0.03897125646471977, |
|
"rewards/rejected": -0.020839953795075417, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.0466012874463507e-07, |
|
"logits/chosen": -1.2811259031295776, |
|
"logits/rejected": -0.9887920618057251, |
|
"logps/chosen": -267.3749694824219, |
|
"logps/rejected": -244.70596313476562, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.038502246141433716, |
|
"rewards/margins": 0.060044266283512115, |
|
"rewards/margins_max": 0.08822239935398102, |
|
"rewards/margins_min": 0.03186614066362381, |
|
"rewards/margins_std": 0.039849892258644104, |
|
"rewards/rejected": -0.02154202200472355, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 1.0243144327477013e-07, |
|
"logits/chosen": -1.4756540060043335, |
|
"logits/rejected": -0.9919270277023315, |
|
"logps/chosen": -223.4065704345703, |
|
"logps/rejected": -209.97573852539062, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04816528037190437, |
|
"rewards/margins": 0.07530733942985535, |
|
"rewards/margins_max": 0.10730701684951782, |
|
"rewards/margins_min": 0.04330766201019287, |
|
"rewards/margins_std": 0.04525437951087952, |
|
"rewards/rejected": -0.027142059057950974, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.0022061333671647e-07, |
|
"logits/chosen": -1.3365637063980103, |
|
"logits/rejected": -0.9453974962234497, |
|
"logps/chosen": -221.6447296142578, |
|
"logps/rejected": -205.4340057373047, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.043263550847768784, |
|
"rewards/margins": 0.07088526338338852, |
|
"rewards/margins_max": 0.09706144034862518, |
|
"rewards/margins_min": 0.044709086418151855, |
|
"rewards/margins_std": 0.037018708884716034, |
|
"rewards/rejected": -0.027621712535619736, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 9.802790643694817e-08, |
|
"logits/chosen": -1.3576759099960327, |
|
"logits/rejected": -1.1886638402938843, |
|
"logps/chosen": -196.93856811523438, |
|
"logps/rejected": -203.70106506347656, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03112906776368618, |
|
"rewards/margins": 0.05759000778198242, |
|
"rewards/margins_max": 0.08904091268777847, |
|
"rewards/margins_min": 0.02613910473883152, |
|
"rewards/margins_std": 0.044478293508291245, |
|
"rewards/rejected": -0.026460934430360794, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.585358788908393e-08, |
|
"logits/chosen": -1.386399745941162, |
|
"logits/rejected": -1.065953254699707, |
|
"logps/chosen": -228.66220092773438, |
|
"logps/rejected": -250.2444610595703, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.030235329642891884, |
|
"rewards/margins": 0.05980142205953598, |
|
"rewards/margins_max": 0.09089671075344086, |
|
"rewards/margins_min": 0.028706133365631104, |
|
"rewards/margins_std": 0.043975379317998886, |
|
"rewards/rejected": -0.029566094279289246, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 9.36979207817849e-08, |
|
"logits/chosen": -1.5047038793563843, |
|
"logits/rejected": -1.2480775117874146, |
|
"logps/chosen": -239.8202667236328, |
|
"logps/rejected": -234.4571533203125, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.032885629683732986, |
|
"rewards/margins": 0.05206092447042465, |
|
"rewards/margins_max": 0.07507045567035675, |
|
"rewards/margins_min": 0.029051411896944046, |
|
"rewards/margins_std": 0.032540373504161835, |
|
"rewards/rejected": -0.019175300374627113, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 9.156116594692096e-08, |
|
"logits/chosen": -1.4589383602142334, |
|
"logits/rejected": -0.9495819807052612, |
|
"logps/chosen": -231.2331085205078, |
|
"logps/rejected": -212.32382202148438, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04424898326396942, |
|
"rewards/margins": 0.06708662211894989, |
|
"rewards/margins_max": 0.09897585213184357, |
|
"rewards/margins_min": 0.03519739955663681, |
|
"rewards/margins_std": 0.04509817436337471, |
|
"rewards/rejected": -0.02283763512969017, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.375, |
|
"learning_rate": 8.944358192801102e-08, |
|
"logits/chosen": -1.4549717903137207, |
|
"logits/rejected": -0.9532996416091919, |
|
"logps/chosen": -222.93148803710938, |
|
"logps/rejected": -191.50634765625, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.042102448642253876, |
|
"rewards/margins": 0.07653506100177765, |
|
"rewards/margins_max": 0.11154161393642426, |
|
"rewards/margins_min": 0.041528504341840744, |
|
"rewards/margins_std": 0.04950674995779991, |
|
"rewards/rejected": -0.034432608634233475, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 8.734542494893954e-08, |
|
"logits/chosen": -1.492494821548462, |
|
"logits/rejected": -1.2444711923599243, |
|
"logps/chosen": -219.95266723632812, |
|
"logps/rejected": -268.90484619140625, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.04024102911353111, |
|
"rewards/margins": 0.06013556569814682, |
|
"rewards/margins_max": 0.08759422600269318, |
|
"rewards/margins_min": 0.03267688676714897, |
|
"rewards/margins_std": 0.03883242979645729, |
|
"rewards/rejected": -0.01989452913403511, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 8.526694888295355e-08, |
|
"logits/chosen": -1.3630679845809937, |
|
"logits/rejected": -1.0612514019012451, |
|
"logps/chosen": -223.59716796875, |
|
"logps/rejected": -237.7313690185547, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03609809651970863, |
|
"rewards/margins": 0.06568726152181625, |
|
"rewards/margins_max": 0.08733747154474258, |
|
"rewards/margins_min": 0.04403705149888992, |
|
"rewards/margins_std": 0.03061802126467228, |
|
"rewards/rejected": -0.02958916500210762, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 8.320840522194505e-08, |
|
"logits/chosen": -1.3517181873321533, |
|
"logits/rejected": -1.105916142463684, |
|
"logps/chosen": -233.48831176757812, |
|
"logps/rejected": -236.3004913330078, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03493565320968628, |
|
"rewards/margins": 0.061103563755750656, |
|
"rewards/margins_max": 0.09413080662488937, |
|
"rewards/margins_min": 0.02807632088661194, |
|
"rewards/margins_std": 0.046707578003406525, |
|
"rewards/rejected": -0.026167908683419228, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 8.117004304602052e-08, |
|
"logits/chosen": -1.4049303531646729, |
|
"logits/rejected": -0.988071620464325, |
|
"logps/chosen": -274.993896484375, |
|
"logps/rejected": -221.76278686523438, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.037282317876815796, |
|
"rewards/margins": 0.049982473254203796, |
|
"rewards/margins_max": 0.07475082576274872, |
|
"rewards/margins_min": 0.025214115157723427, |
|
"rewards/margins_std": 0.035027749836444855, |
|
"rewards/rejected": -0.01270015724003315, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 7.915210899336283e-08, |
|
"logits/chosen": -1.5335876941680908, |
|
"logits/rejected": -1.1939712762832642, |
|
"logps/chosen": -214.1549530029297, |
|
"logps/rejected": -259.2396545410156, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0516619011759758, |
|
"rewards/margins": 0.06498047709465027, |
|
"rewards/margins_max": 0.09797366708517075, |
|
"rewards/margins_min": 0.03198728710412979, |
|
"rewards/margins_std": 0.046659428626298904, |
|
"rewards/rejected": -0.013318580575287342, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 7.715484723038837e-08, |
|
"logits/chosen": -1.1930948495864868, |
|
"logits/rejected": -0.938764750957489, |
|
"logps/chosen": -220.21621704101562, |
|
"logps/rejected": -254.71420288085938, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.027883481234312057, |
|
"rewards/margins": 0.060532040894031525, |
|
"rewards/margins_max": 0.0814305990934372, |
|
"rewards/margins_min": 0.03963347524404526, |
|
"rewards/margins_std": 0.029555032029747963, |
|
"rewards/rejected": -0.03264855593442917, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 7.517849942220348e-08, |
|
"logits/chosen": -1.288425087928772, |
|
"logits/rejected": -0.9016556739807129, |
|
"logps/chosen": -207.7607421875, |
|
"logps/rejected": -215.00808715820312, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.033341165632009506, |
|
"rewards/margins": 0.05306249111890793, |
|
"rewards/margins_max": 0.07986272126436234, |
|
"rewards/margins_min": 0.026262247934937477, |
|
"rewards/margins_std": 0.03790125995874405, |
|
"rewards/rejected": -0.019721319898962975, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": -1.3114441633224487, |
|
"logits/rejected": -1.1315343379974365, |
|
"logps/chosen": -204.32998657226562, |
|
"logps/rejected": -197.68760681152344, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.023209819570183754, |
|
"rewards/margins": 0.04553220421075821, |
|
"rewards/margins_max": 0.06928315758705139, |
|
"rewards/margins_min": 0.021781256422400475, |
|
"rewards/margins_std": 0.033588919788599014, |
|
"rewards/rejected": -0.022322386503219604, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 7.128949964893646e-08, |
|
"logits/chosen": -1.4030101299285889, |
|
"logits/rejected": -1.0203847885131836, |
|
"logps/chosen": -246.3531951904297, |
|
"logps/rejected": -231.5150604248047, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.041862308979034424, |
|
"rewards/margins": 0.06798645108938217, |
|
"rewards/margins_max": 0.09953634440898895, |
|
"rewards/margins_min": 0.036436546593904495, |
|
"rewards/margins_std": 0.04461830109357834, |
|
"rewards/rejected": -0.026124143972992897, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 6.937731824588141e-08, |
|
"logits/chosen": -1.3225687742233276, |
|
"logits/rejected": -1.2012965679168701, |
|
"logps/chosen": -161.27560424804688, |
|
"logps/rejected": -162.04849243164062, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.024264657869935036, |
|
"rewards/margins": 0.04363849759101868, |
|
"rewards/margins_max": 0.06468725949525833, |
|
"rewards/margins_min": 0.02258973941206932, |
|
"rewards/margins_std": 0.029767444357275963, |
|
"rewards/rejected": -0.01937383972108364, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 6.74869918647325e-08, |
|
"logits/chosen": -1.2273991107940674, |
|
"logits/rejected": -0.8869683146476746, |
|
"logps/chosen": -242.3751678466797, |
|
"logps/rejected": -222.62265014648438, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04028186947107315, |
|
"rewards/margins": 0.05239185690879822, |
|
"rewards/margins_max": 0.07009953260421753, |
|
"rewards/margins_min": 0.03468417376279831, |
|
"rewards/margins_std": 0.025042440742254257, |
|
"rewards/rejected": -0.012109987437725067, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 6.56187492316059e-08, |
|
"logits/chosen": -1.3965575695037842, |
|
"logits/rejected": -0.9450374841690063, |
|
"logps/chosen": -220.7981414794922, |
|
"logps/rejected": -155.75204467773438, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02550414577126503, |
|
"rewards/margins": 0.06219423562288284, |
|
"rewards/margins_max": 0.08597894012928009, |
|
"rewards/margins_min": 0.0384095273911953, |
|
"rewards/margins_std": 0.033636655658483505, |
|
"rewards/rejected": -0.036690086126327515, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 6.377281640052357e-08, |
|
"logits/chosen": -1.5471882820129395, |
|
"logits/rejected": -1.1804416179656982, |
|
"logps/chosen": -192.26565551757812, |
|
"logps/rejected": -246.218994140625, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04850774258375168, |
|
"rewards/margins": 0.06347064673900604, |
|
"rewards/margins_max": 0.10149389505386353, |
|
"rewards/margins_min": 0.025447404012084007, |
|
"rewards/margins_std": 0.05377299338579178, |
|
"rewards/rejected": -0.014962906017899513, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 6.19494167260613e-08, |
|
"logits/chosen": -1.425964117050171, |
|
"logits/rejected": -1.0960302352905273, |
|
"logps/chosen": -184.11727905273438, |
|
"logps/rejected": -191.51913452148438, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0385814905166626, |
|
"rewards/margins": 0.061642616987228394, |
|
"rewards/margins_max": 0.09265581518411636, |
|
"rewards/margins_min": 0.030629415065050125, |
|
"rewards/margins_std": 0.04385928437113762, |
|
"rewards/rejected": -0.023061122745275497, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 6.01487708363232e-08, |
|
"logits/chosen": -1.4187657833099365, |
|
"logits/rejected": -1.0462344884872437, |
|
"logps/chosen": -231.49960327148438, |
|
"logps/rejected": -250.50973510742188, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04294178634881973, |
|
"rewards/margins": 0.06284011900424957, |
|
"rewards/margins_max": 0.0833154022693634, |
|
"rewards/margins_min": 0.042364828288555145, |
|
"rewards/margins_std": 0.02895643189549446, |
|
"rewards/rejected": -0.01989833451807499, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 5.837109660624606e-08, |
|
"logits/chosen": -1.3851536512374878, |
|
"logits/rejected": -1.0157699584960938, |
|
"logps/chosen": -226.1177978515625, |
|
"logps/rejected": -238.81539916992188, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03696604073047638, |
|
"rewards/margins": 0.06656143069267273, |
|
"rewards/margins_max": 0.0883278027176857, |
|
"rewards/margins_min": 0.04479505866765976, |
|
"rewards/margins_std": 0.030782291665673256, |
|
"rewards/rejected": -0.02959538996219635, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 5.6616609131236725e-08, |
|
"logits/chosen": -1.5234705209732056, |
|
"logits/rejected": -1.249939203262329, |
|
"logps/chosen": -209.16690063476562, |
|
"logps/rejected": -201.7328643798828, |
|
"loss": 0.666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.042094189673662186, |
|
"rewards/margins": 0.05819466710090637, |
|
"rewards/margins_max": 0.08667898923158646, |
|
"rewards/margins_min": 0.029710358008742332, |
|
"rewards/margins_std": 0.040282897651195526, |
|
"rewards/rejected": -0.016100479289889336, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 5.4885520701146324e-08, |
|
"logits/chosen": -1.27875816822052, |
|
"logits/rejected": -0.9493977427482605, |
|
"logps/chosen": -214.4361572265625, |
|
"logps/rejected": -233.2643280029297, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.034729085862636566, |
|
"rewards/margins": 0.06826482713222504, |
|
"rewards/margins_max": 0.11228573322296143, |
|
"rewards/margins_min": 0.024243932217359543, |
|
"rewards/margins_std": 0.06225494667887688, |
|
"rewards/rejected": -0.03353574126958847, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 5.3178040774583236e-08, |
|
"logits/chosen": -1.4629589319229126, |
|
"logits/rejected": -0.9861122965812683, |
|
"logps/chosen": -280.67486572265625, |
|
"logps/rejected": -271.3564147949219, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03766489028930664, |
|
"rewards/margins": 0.06308640539646149, |
|
"rewards/margins_max": 0.08921506255865097, |
|
"rewards/margins_min": 0.036957744508981705, |
|
"rewards/margins_std": 0.03695150464773178, |
|
"rewards/rejected": -0.025421511381864548, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 5.149437595356901e-08, |
|
"logits/chosen": -1.3392517566680908, |
|
"logits/rejected": -0.9539203643798828, |
|
"logps/chosen": -244.0900421142578, |
|
"logps/rejected": -216.6325225830078, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03929731249809265, |
|
"rewards/margins": 0.05567679926753044, |
|
"rewards/margins_max": 0.08472796529531479, |
|
"rewards/margins_min": 0.026625623926520348, |
|
"rewards/margins_std": 0.041084565222263336, |
|
"rewards/rejected": -0.01637948676943779, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 4.9834729958540016e-08, |
|
"logits/chosen": -1.3185430765151978, |
|
"logits/rejected": -0.9537866711616516, |
|
"logps/chosen": -255.76937866210938, |
|
"logps/rejected": -173.42251586914062, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03467049077153206, |
|
"rewards/margins": 0.05125656723976135, |
|
"rewards/margins_max": 0.07643640786409378, |
|
"rewards/margins_min": 0.026076724752783775, |
|
"rewards/margins_std": 0.03560966998338699, |
|
"rewards/rejected": -0.016586078330874443, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.8199303603697614e-08, |
|
"logits/chosen": -1.4323641061782837, |
|
"logits/rejected": -1.1901360750198364, |
|
"logps/chosen": -212.28759765625, |
|
"logps/rejected": -251.69052124023438, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03305204585194588, |
|
"rewards/margins": 0.04768746346235275, |
|
"rewards/margins_max": 0.0705387219786644, |
|
"rewards/margins_min": 0.024836191907525063, |
|
"rewards/margins_std": 0.03231657296419144, |
|
"rewards/rejected": -0.014635416679084301, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.658829477270995e-08, |
|
"logits/chosen": -1.4831786155700684, |
|
"logits/rejected": -1.0595829486846924, |
|
"logps/chosen": -205.73196411132812, |
|
"logps/rejected": -281.29119873046875, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04150627925992012, |
|
"rewards/margins": 0.06586649268865585, |
|
"rewards/margins_max": 0.08460468798875809, |
|
"rewards/margins_min": 0.047128308564424515, |
|
"rewards/margins_std": 0.026499798521399498, |
|
"rewards/rejected": -0.02436022460460663, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.5001898394768336e-08, |
|
"logits/chosen": -1.4085218906402588, |
|
"logits/rejected": -1.1751958131790161, |
|
"logps/chosen": -211.86831665039062, |
|
"logps/rejected": -212.71908569335938, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02613511122763157, |
|
"rewards/margins": 0.05009372904896736, |
|
"rewards/margins_max": 0.07305373251438141, |
|
"rewards/margins_min": 0.027133729308843613, |
|
"rewards/margins_std": 0.03247034177184105, |
|
"rewards/rejected": -0.023958619683980942, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 4.3440306421001324e-08, |
|
"logits/chosen": -1.531702995300293, |
|
"logits/rejected": -1.2762770652770996, |
|
"logps/chosen": -264.6157531738281, |
|
"logps/rejected": -239.91134643554688, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03299673646688461, |
|
"rewards/margins": 0.05226144194602966, |
|
"rewards/margins_max": 0.08147990703582764, |
|
"rewards/margins_min": 0.023042969405651093, |
|
"rewards/margins_std": 0.041321154683828354, |
|
"rewards/rejected": -0.019264699891209602, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 4.190370780124863e-08, |
|
"logits/chosen": -1.2897651195526123, |
|
"logits/rejected": -1.0072309970855713, |
|
"logps/chosen": -186.4278564453125, |
|
"logps/rejected": -243.1654815673828, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.032657403498888016, |
|
"rewards/margins": 0.049304358661174774, |
|
"rewards/margins_max": 0.07682739198207855, |
|
"rewards/margins_min": 0.021781327202916145, |
|
"rewards/margins_std": 0.038923438638448715, |
|
"rewards/rejected": -0.01664695516228676, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.0392288461199045e-08, |
|
"logits/chosen": -1.2460219860076904, |
|
"logits/rejected": -1.0387169122695923, |
|
"logps/chosen": -224.2480926513672, |
|
"logps/rejected": -217.1114959716797, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.033198267221450806, |
|
"rewards/margins": 0.061884719878435135, |
|
"rewards/margins_max": 0.09340154379606247, |
|
"rewards/margins_min": 0.0303678959608078, |
|
"rewards/margins_std": 0.04457152262330055, |
|
"rewards/rejected": -0.02868645451962948, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 3.8906231279893423e-08, |
|
"logits/chosen": -1.231979250907898, |
|
"logits/rejected": -1.0273730754852295, |
|
"logps/chosen": -233.99267578125, |
|
"logps/rejected": -187.70278930664062, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02670123614370823, |
|
"rewards/margins": 0.05759245902299881, |
|
"rewards/margins_max": 0.09034743160009384, |
|
"rewards/margins_min": 0.024837475270032883, |
|
"rewards/margins_std": 0.0463225394487381, |
|
"rewards/rejected": -0.03089122101664543, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 3.74457160675965e-08, |
|
"logits/chosen": -1.3447935581207275, |
|
"logits/rejected": -1.003073811531067, |
|
"logps/chosen": -207.041015625, |
|
"logps/rejected": -198.29556274414062, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03199172765016556, |
|
"rewards/margins": 0.05442710965871811, |
|
"rewards/margins_max": 0.07237287610769272, |
|
"rewards/margins_min": 0.036481358110904694, |
|
"rewards/margins_std": 0.025379130616784096, |
|
"rewards/rejected": -0.02243538200855255, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 3.601091954404062e-08, |
|
"logits/chosen": -1.2016583681106567, |
|
"logits/rejected": -0.9326213002204895, |
|
"logps/chosen": -238.39126586914062, |
|
"logps/rejected": -243.9576416015625, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.026809915900230408, |
|
"rewards/margins": 0.04789675027132034, |
|
"rewards/margins_max": 0.07007952034473419, |
|
"rewards/margins_min": 0.025713974609971046, |
|
"rewards/margins_std": 0.03137117996811867, |
|
"rewards/rejected": -0.021086832508444786, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 3.460201531704263e-08, |
|
"logits/chosen": -1.3697774410247803, |
|
"logits/rejected": -0.8151613473892212, |
|
"logps/chosen": -393.69189453125, |
|
"logps/rejected": -246.65817260742188, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03834783285856247, |
|
"rewards/margins": 0.07543188333511353, |
|
"rewards/margins_max": 0.10231365263462067, |
|
"rewards/margins_min": 0.04855012148618698, |
|
"rewards/margins_std": 0.038016561418771744, |
|
"rewards/rejected": -0.037084050476551056, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 3.321917386149772e-08, |
|
"logits/chosen": -1.4533543586730957, |
|
"logits/rejected": -1.0557693243026733, |
|
"logps/chosen": -209.1657257080078, |
|
"logps/rejected": -214.769287109375, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.042614761739969254, |
|
"rewards/margins": 0.05421183258295059, |
|
"rewards/margins_max": 0.08117054402828217, |
|
"rewards/margins_min": 0.02725311741232872, |
|
"rewards/margins_std": 0.038125377148389816, |
|
"rewards/rejected": -0.011597072705626488, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 3.1862562498752354e-08, |
|
"logits/chosen": -1.4646778106689453, |
|
"logits/rejected": -1.1616142988204956, |
|
"logps/chosen": -192.743408203125, |
|
"logps/rejected": -208.6314697265625, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03919973596930504, |
|
"rewards/margins": 0.048980120569467545, |
|
"rewards/margins_max": 0.06728260964155197, |
|
"rewards/margins_min": 0.030677635222673416, |
|
"rewards/margins_std": 0.02588362991809845, |
|
"rewards/rejected": -0.009780386462807655, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 3.053234537635857e-08, |
|
"logits/chosen": -1.5152153968811035, |
|
"logits/rejected": -1.1075925827026367, |
|
"logps/chosen": -182.39224243164062, |
|
"logps/rejected": -248.19351196289062, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.038503944873809814, |
|
"rewards/margins": 0.06572575867176056, |
|
"rewards/margins_max": 0.09247289597988129, |
|
"rewards/margins_min": 0.03897860646247864, |
|
"rewards/margins_std": 0.03782618045806885, |
|
"rewards/rejected": -0.027221810072660446, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 2.922868344821236e-08, |
|
"logits/chosen": -1.3224998712539673, |
|
"logits/rejected": -0.881952166557312, |
|
"logps/chosen": -220.5806121826172, |
|
"logps/rejected": -189.16519165039062, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03971542418003082, |
|
"rewards/margins": 0.06064347177743912, |
|
"rewards/margins_max": 0.08912724256515503, |
|
"rewards/margins_min": 0.03215969726443291, |
|
"rewards/margins_std": 0.04028213769197464, |
|
"rewards/rejected": -0.020928047597408295, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 2.7951734455078786e-08, |
|
"logits/chosen": -1.4898918867111206, |
|
"logits/rejected": -0.9584072828292847, |
|
"logps/chosen": -253.1838836669922, |
|
"logps/rejected": -262.40740966796875, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.04760271683335304, |
|
"rewards/margins": 0.06493322551250458, |
|
"rewards/margins_max": 0.088630810379982, |
|
"rewards/margins_min": 0.041235629469156265, |
|
"rewards/margins_std": 0.03351346030831337, |
|
"rewards/rejected": -0.017330504953861237, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 2.670165290550544e-08, |
|
"logits/chosen": -1.386683464050293, |
|
"logits/rejected": -0.9467649459838867, |
|
"logps/chosen": -209.0247802734375, |
|
"logps/rejected": -213.07766723632812, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03524526581168175, |
|
"rewards/margins": 0.05571124702692032, |
|
"rewards/margins_max": 0.0755188837647438, |
|
"rewards/margins_min": 0.03590361401438713, |
|
"rewards/margins_std": 0.028012219816446304, |
|
"rewards/rejected": -0.02046598121523857, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 2.5478590057127268e-08, |
|
"logits/chosen": -1.4220774173736572, |
|
"logits/rejected": -1.0289338827133179, |
|
"logps/chosen": -211.04776000976562, |
|
"logps/rejected": -194.3659210205078, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03885051980614662, |
|
"rewards/margins": 0.06495725363492966, |
|
"rewards/margins_max": 0.09185833483934402, |
|
"rewards/margins_min": 0.038056183606386185, |
|
"rewards/margins_std": 0.03804386407136917, |
|
"rewards/rejected": -0.026106741279363632, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 2.4282693898364432e-08, |
|
"logits/chosen": -1.4226223230361938, |
|
"logits/rejected": -0.9696500897407532, |
|
"logps/chosen": -176.65994262695312, |
|
"logps/rejected": -180.49554443359375, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04054059833288193, |
|
"rewards/margins": 0.06953348219394684, |
|
"rewards/margins_max": 0.09578864276409149, |
|
"rewards/margins_min": 0.04327831417322159, |
|
"rewards/margins_std": 0.03713040426373482, |
|
"rewards/rejected": -0.02899288199841976, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 2.3114109130516424e-08, |
|
"logits/chosen": -1.3210171461105347, |
|
"logits/rejected": -0.9485718607902527, |
|
"logps/chosen": -182.39852905273438, |
|
"logps/rejected": -210.02572631835938, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03208126500248909, |
|
"rewards/margins": 0.06662876158952713, |
|
"rewards/margins_max": 0.09643807262182236, |
|
"rewards/margins_min": 0.036819443106651306, |
|
"rewards/margins_std": 0.04215674102306366, |
|
"rewards/rejected": -0.03454749658703804, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 2.1972977150253064e-08, |
|
"logits/chosen": -1.5038772821426392, |
|
"logits/rejected": -0.935627818107605, |
|
"logps/chosen": -247.6013641357422, |
|
"logps/rejected": -288.5367431640625, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.04114503413438797, |
|
"rewards/margins": 0.07020784169435501, |
|
"rewards/margins_max": 0.11474663019180298, |
|
"rewards/margins_min": 0.025669043883681297, |
|
"rewards/margins_std": 0.06298737227916718, |
|
"rewards/rejected": -0.02906280755996704, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 2.085943603250595e-08, |
|
"logits/chosen": -1.428411841392517, |
|
"logits/rejected": -1.1179401874542236, |
|
"logps/chosen": -189.0768585205078, |
|
"logps/rejected": -204.92483520507812, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.037042513489723206, |
|
"rewards/margins": 0.06584902107715607, |
|
"rewards/margins_max": 0.08738056570291519, |
|
"rewards/margins_min": 0.04431745409965515, |
|
"rewards/margins_std": 0.03045022115111351, |
|
"rewards/rejected": -0.028806498274207115, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.977362051376158e-08, |
|
"logits/chosen": -1.4192006587982178, |
|
"logits/rejected": -1.046197772026062, |
|
"logps/chosen": -207.2356414794922, |
|
"logps/rejected": -183.64010620117188, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.040088996291160583, |
|
"rewards/margins": 0.05597255378961563, |
|
"rewards/margins_max": 0.08285551518201828, |
|
"rewards/margins_min": 0.02908957563340664, |
|
"rewards/margins_std": 0.03801826387643814, |
|
"rewards/rejected": -0.01588355377316475, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 1.8715661975758524e-08, |
|
"logits/chosen": -1.2246617078781128, |
|
"logits/rejected": -1.0061366558074951, |
|
"logps/chosen": -167.17034912109375, |
|
"logps/rejected": -239.23721313476562, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03663500398397446, |
|
"rewards/margins": 0.065810427069664, |
|
"rewards/margins_max": 0.09848222881555557, |
|
"rewards/margins_min": 0.03313861042261124, |
|
"rewards/margins_std": 0.04620492085814476, |
|
"rewards/rejected": -0.029175419360399246, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.768568842959037e-08, |
|
"logits/chosen": -1.4292596578598022, |
|
"logits/rejected": -1.0080540180206299, |
|
"logps/chosen": -259.69537353515625, |
|
"logps/rejected": -236.2705078125, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03333950787782669, |
|
"rewards/margins": 0.06502407789230347, |
|
"rewards/margins_max": 0.08750364929437637, |
|
"rewards/margins_min": 0.04254449903964996, |
|
"rewards/margins_std": 0.03179091960191727, |
|
"rewards/rejected": -0.031684570014476776, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.668382450021666e-08, |
|
"logits/chosen": -1.3095591068267822, |
|
"logits/rejected": -1.0401207208633423, |
|
"logps/chosen": -206.1196746826172, |
|
"logps/rejected": -171.01002502441406, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03997686505317688, |
|
"rewards/margins": 0.05088004469871521, |
|
"rewards/margins_max": 0.07651884853839874, |
|
"rewards/margins_min": 0.025241252034902573, |
|
"rewards/margins_std": 0.03625873476266861, |
|
"rewards/rejected": -0.010903185233473778, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 1.571019141138366e-08, |
|
"logits/chosen": -1.3637133836746216, |
|
"logits/rejected": -1.0843619108200073, |
|
"logps/chosen": -171.24868774414062, |
|
"logps/rejected": -179.87950134277344, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0334320105612278, |
|
"rewards/margins": 0.048344530165195465, |
|
"rewards/margins_max": 0.06994569301605225, |
|
"rewards/margins_min": 0.026743358001112938, |
|
"rewards/margins_std": 0.030548665672540665, |
|
"rewards/rejected": -0.014912518672645092, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.4764906970956142e-08, |
|
"logits/chosen": -1.356999397277832, |
|
"logits/rejected": -1.0233064889907837, |
|
"logps/chosen": -193.38766479492188, |
|
"logps/rejected": -196.44386291503906, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03667105361819267, |
|
"rewards/margins": 0.05664552003145218, |
|
"rewards/margins_max": 0.07828361541032791, |
|
"rewards/margins_min": 0.03500741347670555, |
|
"rewards/margins_std": 0.03060089983046055, |
|
"rewards/rejected": -0.01997446082532406, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 1.3848085556663197e-08, |
|
"logits/chosen": -1.2966177463531494, |
|
"logits/rejected": -0.9208385348320007, |
|
"logps/chosen": -267.82086181640625, |
|
"logps/rejected": -202.51319885253906, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.034102655947208405, |
|
"rewards/margins": 0.05611242726445198, |
|
"rewards/margins_max": 0.08146383613348007, |
|
"rewards/margins_min": 0.030761009082198143, |
|
"rewards/margins_std": 0.03585231304168701, |
|
"rewards/rejected": -0.022009767591953278, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 1.2959838102258535e-08, |
|
"logits/chosen": -1.3745180368423462, |
|
"logits/rejected": -1.0097087621688843, |
|
"logps/chosen": -287.45062255859375, |
|
"logps/rejected": -254.785400390625, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03240882605314255, |
|
"rewards/margins": 0.05045477673411369, |
|
"rewards/margins_max": 0.07354002445936203, |
|
"rewards/margins_min": 0.027369529008865356, |
|
"rewards/margins_std": 0.032647471874952316, |
|
"rewards/rejected": -0.018045950680971146, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.2100272084097779e-08, |
|
"logits/chosen": -1.323025107383728, |
|
"logits/rejected": -1.0186015367507935, |
|
"logps/chosen": -183.8828582763672, |
|
"logps/rejected": -250.3448486328125, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0408027246594429, |
|
"rewards/margins": 0.07389940321445465, |
|
"rewards/margins_max": 0.10190100967884064, |
|
"rewards/margins_min": 0.04589778929948807, |
|
"rewards/margins_std": 0.03960026055574417, |
|
"rewards/rejected": -0.03309667855501175, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.1269491508133944e-08, |
|
"logits/chosen": -1.5226811170578003, |
|
"logits/rejected": -0.9228025674819946, |
|
"logps/chosen": -312.5696105957031, |
|
"logps/rejected": -221.99136352539062, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0407402329146862, |
|
"rewards/margins": 0.06274916976690292, |
|
"rewards/margins_max": 0.08748480677604675, |
|
"rewards/margins_min": 0.038013529032468796, |
|
"rewards/margins_std": 0.03498147428035736, |
|
"rewards/rejected": -0.02200893685221672, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.0467596897333008e-08, |
|
"logits/chosen": -1.3627498149871826, |
|
"logits/rejected": -0.8954145312309265, |
|
"logps/chosen": -231.42977905273438, |
|
"logps/rejected": -222.2968292236328, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04414510354399681, |
|
"rewards/margins": 0.06752908229827881, |
|
"rewards/margins_max": 0.09981563687324524, |
|
"rewards/margins_min": 0.03524252399802208, |
|
"rewards/margins_std": 0.04566008597612381, |
|
"rewards/rejected": -0.02338396944105625, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 9.694685279510672e-09, |
|
"logits/chosen": -1.3423680067062378, |
|
"logits/rejected": -1.2014684677124023, |
|
"logps/chosen": -185.5139617919922, |
|
"logps/rejected": -232.6142120361328, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022014331072568893, |
|
"rewards/margins": 0.049973584711551666, |
|
"rewards/margins_max": 0.0689433366060257, |
|
"rewards/margins_min": 0.031003836542367935, |
|
"rewards/margins_std": 0.026827272027730942, |
|
"rewards/rejected": -0.027959251776337624, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.375, |
|
"learning_rate": 8.950850175592328e-09, |
|
"logits/chosen": -1.4081456661224365, |
|
"logits/rejected": -1.0961415767669678, |
|
"logps/chosen": -232.86813354492188, |
|
"logps/rejected": -269.9138488769531, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0401880256831646, |
|
"rewards/margins": 0.05734118074178696, |
|
"rewards/margins_max": 0.08120250701904297, |
|
"rewards/margins_min": 0.03347986191511154, |
|
"rewards/margins_std": 0.03374500200152397, |
|
"rewards/rejected": -0.01715315505862236, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 8.236181588297115e-09, |
|
"logits/chosen": -1.3293626308441162, |
|
"logits/rejected": -0.9906571507453918, |
|
"logps/chosen": -256.72100830078125, |
|
"logps/rejected": -313.48504638671875, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03118916228413582, |
|
"rewards/margins": 0.06911532580852509, |
|
"rewards/margins_max": 0.10093537718057632, |
|
"rewards/margins_min": 0.03729528561234474, |
|
"rewards/margins_std": 0.045000337064266205, |
|
"rewards/rejected": -0.037926167249679565, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.375, |
|
"learning_rate": 7.550765991247654e-09, |
|
"logits/chosen": -1.3571122884750366, |
|
"logits/rejected": -0.9799866676330566, |
|
"logps/chosen": -237.0681610107422, |
|
"logps/rejected": -217.8629913330078, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.039660923182964325, |
|
"rewards/margins": 0.058103930205106735, |
|
"rewards/margins_max": 0.08864767849445343, |
|
"rewards/margins_min": 0.02756018377840519, |
|
"rewards/margins_std": 0.04319537803530693, |
|
"rewards/rejected": -0.01844300702214241, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 6.894686318507064e-09, |
|
"logits/chosen": -1.3770530223846436, |
|
"logits/rejected": -1.0678465366363525, |
|
"logps/chosen": -207.478759765625, |
|
"logps/rejected": -254.6818389892578, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03722939267754555, |
|
"rewards/margins": 0.057834554463624954, |
|
"rewards/margins_max": 0.07993746548891068, |
|
"rewards/margins_min": 0.035731635987758636, |
|
"rewards/margins_std": 0.0312582366168499, |
|
"rewards/rejected": -0.020605161786079407, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 6.268021954544095e-09, |
|
"logits/chosen": -1.1451586484909058, |
|
"logits/rejected": -0.9856246709823608, |
|
"logps/chosen": -198.33804321289062, |
|
"logps/rejected": -290.0233459472656, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03236705809831619, |
|
"rewards/margins": 0.06491495668888092, |
|
"rewards/margins_max": 0.09603826701641083, |
|
"rewards/margins_min": 0.033791638910770416, |
|
"rewards/margins_std": 0.044015005230903625, |
|
"rewards/rejected": -0.03254788741469383, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 5.670848724627531e-09, |
|
"logits/chosen": -1.4588849544525146, |
|
"logits/rejected": -1.0730645656585693, |
|
"logps/chosen": -301.3870849609375, |
|
"logps/rejected": -199.2222442626953, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.026702869683504105, |
|
"rewards/margins": 0.04165857285261154, |
|
"rewards/margins_max": 0.06404221057891846, |
|
"rewards/margins_min": 0.01927492953836918, |
|
"rewards/margins_std": 0.03165525197982788, |
|
"rewards/rejected": -0.014955705031752586, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 5.103238885651617e-09, |
|
"logits/chosen": -1.4286754131317139, |
|
"logits/rejected": -0.9818390011787415, |
|
"logps/chosen": -238.0774688720703, |
|
"logps/rejected": -222.6654815673828, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03790941461920738, |
|
"rewards/margins": 0.06597913056612015, |
|
"rewards/margins_max": 0.09669280052185059, |
|
"rewards/margins_min": 0.03526546061038971, |
|
"rewards/margins_std": 0.04343568533658981, |
|
"rewards/rejected": -0.028069715946912766, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 4.565261117393249e-09, |
|
"logits/chosen": -1.527706503868103, |
|
"logits/rejected": -1.1605089902877808, |
|
"logps/chosen": -238.7028045654297, |
|
"logps/rejected": -198.54071044921875, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.038233425468206406, |
|
"rewards/margins": 0.05112460255622864, |
|
"rewards/margins_max": 0.072813980281353, |
|
"rewards/margins_min": 0.029435228556394577, |
|
"rewards/margins_std": 0.03067341446876526, |
|
"rewards/rejected": -0.01289118267595768, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 4.056980514201447e-09, |
|
"logits/chosen": -1.3091288805007935, |
|
"logits/rejected": -0.9673709869384766, |
|
"logps/chosen": -203.36215209960938, |
|
"logps/rejected": -215.65908813476562, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.038450248539447784, |
|
"rewards/margins": 0.06377027928829193, |
|
"rewards/margins_max": 0.09830651432275772, |
|
"rewards/margins_min": 0.0292340274900198, |
|
"rewards/margins_std": 0.048841629177331924, |
|
"rewards/rejected": -0.02532001957297325, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 3.5784585771215235e-09, |
|
"logits/chosen": -1.3335479497909546, |
|
"logits/rejected": -0.9828931093215942, |
|
"logps/chosen": -176.47000122070312, |
|
"logps/rejected": -178.46786499023438, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.031307101249694824, |
|
"rewards/margins": 0.04759521037340164, |
|
"rewards/margins_max": 0.0687338337302208, |
|
"rewards/margins_min": 0.02645658515393734, |
|
"rewards/margins_std": 0.02989453449845314, |
|
"rewards/rejected": -0.016288110986351967, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 3.129753206453201e-09, |
|
"logits/chosen": -1.4696094989776611, |
|
"logits/rejected": -1.032707929611206, |
|
"logps/chosen": -234.9283447265625, |
|
"logps/rejected": -236.0854949951172, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0398876890540123, |
|
"rewards/margins": 0.0536864697933197, |
|
"rewards/margins_max": 0.08105526119470596, |
|
"rewards/margins_min": 0.026317689567804337, |
|
"rewards/margins_std": 0.03870530426502228, |
|
"rewards/rejected": -0.013798783533275127, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 2.7109186947449348e-09, |
|
"logits/chosen": -1.4651210308074951, |
|
"logits/rejected": -1.179198980331421, |
|
"logps/chosen": -185.0526123046875, |
|
"logps/rejected": -206.34677124023438, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.037171076983213425, |
|
"rewards/margins": 0.04990251734852791, |
|
"rewards/margins_max": 0.07099257409572601, |
|
"rewards/margins_min": 0.0288124717772007, |
|
"rewards/margins_std": 0.029825836420059204, |
|
"rewards/rejected": -0.012731445021927357, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 2.322005720224618e-09, |
|
"logits/chosen": -1.2301725149154663, |
|
"logits/rejected": -0.8613675236701965, |
|
"logps/chosen": -176.6241912841797, |
|
"logps/rejected": -234.5286407470703, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.040943752974271774, |
|
"rewards/margins": 0.06620831787586212, |
|
"rewards/margins_max": 0.09054501354694366, |
|
"rewards/margins_min": 0.04187161475419998, |
|
"rewards/margins_std": 0.0344172939658165, |
|
"rewards/rejected": -0.02526455745100975, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.9630613406676764e-09, |
|
"logits/chosen": -1.3148514032363892, |
|
"logits/rejected": -1.1194158792495728, |
|
"logps/chosen": -204.06472778320312, |
|
"logps/rejected": -175.95155334472656, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.030322005972266197, |
|
"rewards/margins": 0.04166535660624504, |
|
"rewards/margins_max": 0.06148039177060127, |
|
"rewards/margins_min": 0.021850308403372765, |
|
"rewards/margins_std": 0.028022700920701027, |
|
"rewards/rejected": -0.011343345046043396, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 1.6341289877028486e-09, |
|
"logits/chosen": -1.2309526205062866, |
|
"logits/rejected": -0.9648950695991516, |
|
"logps/chosen": -221.1148223876953, |
|
"logps/rejected": -218.8831024169922, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03704274445772171, |
|
"rewards/margins": 0.06422804296016693, |
|
"rewards/margins_max": 0.09260173887014389, |
|
"rewards/margins_min": 0.03585432469844818, |
|
"rewards/margins_std": 0.040126487612724304, |
|
"rewards/rejected": -0.027185291051864624, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.33524846155747e-09, |
|
"logits/chosen": -1.5479004383087158, |
|
"logits/rejected": -1.124626874923706, |
|
"logps/chosen": -272.0228271484375, |
|
"logps/rejected": -232.5234832763672, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03827238082885742, |
|
"rewards/margins": 0.06339852511882782, |
|
"rewards/margins_max": 0.08984600752592087, |
|
"rewards/margins_min": 0.03695103898644447, |
|
"rewards/margins_std": 0.03740239515900612, |
|
"rewards/rejected": -0.0251261405646801, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 1.066455926241383e-09, |
|
"logits/chosen": -1.3203740119934082, |
|
"logits/rejected": -1.0223264694213867, |
|
"logps/chosen": -217.78921508789062, |
|
"logps/rejected": -185.77662658691406, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03560353443026543, |
|
"rewards/margins": 0.05814961716532707, |
|
"rewards/margins_max": 0.08662423491477966, |
|
"rewards/margins_min": 0.029674995690584183, |
|
"rewards/margins_std": 0.04026919603347778, |
|
"rewards/rejected": -0.022546080872416496, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.625, |
|
"learning_rate": 8.277839051712698e-10, |
|
"logits/chosen": -1.2869453430175781, |
|
"logits/rejected": -0.9400846362113953, |
|
"logps/chosen": -253.38711547851562, |
|
"logps/rejected": -252.90274047851562, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04124082997441292, |
|
"rewards/margins": 0.05371633172035217, |
|
"rewards/margins_max": 0.08102357387542725, |
|
"rewards/margins_min": 0.0264090858399868, |
|
"rewards/margins_std": 0.03861827403306961, |
|
"rewards/rejected": -0.012475499883294106, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 6.192612772354944e-10, |
|
"logits/chosen": -1.323472499847412, |
|
"logits/rejected": -0.9910783767700195, |
|
"logps/chosen": -250.5233917236328, |
|
"logps/rejected": -254.0393524169922, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.043138034641742706, |
|
"rewards/margins": 0.062495727092027664, |
|
"rewards/margins_max": 0.09117720276117325, |
|
"rewards/margins_min": 0.03381425514817238, |
|
"rewards/margins_std": 0.0405617319047451, |
|
"rewards/rejected": -0.019357692450284958, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 4.4091327329956465e-10, |
|
"logits/chosen": -1.3970682621002197, |
|
"logits/rejected": -1.0630197525024414, |
|
"logps/chosen": -187.95303344726562, |
|
"logps/rejected": -180.37051391601562, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04492129012942314, |
|
"rewards/margins": 0.0595441572368145, |
|
"rewards/margins_max": 0.08808682858943939, |
|
"rewards/margins_min": 0.03100150264799595, |
|
"rewards/margins_std": 0.04036542400717735, |
|
"rewards/rejected": -0.014622872695326805, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 2.927614731534356e-10, |
|
"logits/chosen": -1.3621008396148682, |
|
"logits/rejected": -1.0651832818984985, |
|
"logps/chosen": -214.0552520751953, |
|
"logps/rejected": -293.3019104003906, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03748806565999985, |
|
"rewards/margins": 0.06025733798742294, |
|
"rewards/margins_max": 0.08895647525787354, |
|
"rewards/margins_min": 0.031558211892843246, |
|
"rewards/margins_std": 0.04058670252561569, |
|
"rewards/rejected": -0.022769279778003693, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.7482380290034792e-10, |
|
"logits/chosen": -1.4978671073913574, |
|
"logits/rejected": -1.0491201877593994, |
|
"logps/chosen": -187.7884063720703, |
|
"logps/rejected": -193.33639526367188, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03827610984444618, |
|
"rewards/margins": 0.06368110328912735, |
|
"rewards/margins_max": 0.08939781039953232, |
|
"rewards/margins_min": 0.03796439617872238, |
|
"rewards/margins_std": 0.036368921399116516, |
|
"rewards/rejected": -0.025404995307326317, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 8.711453278778535e-11, |
|
"logits/chosen": -1.3394626379013062, |
|
"logits/rejected": -0.8948138356208801, |
|
"logps/chosen": -242.09231567382812, |
|
"logps/rejected": -217.08139038085938, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.035616446286439896, |
|
"rewards/margins": 0.06384526938199997, |
|
"rewards/margins_max": 0.09082364290952682, |
|
"rewards/margins_min": 0.03686688840389252, |
|
"rewards/margins_std": 0.03815319389104843, |
|
"rewards/rejected": -0.028228823095560074, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 2.9644275480772416e-11, |
|
"logits/chosen": -1.425526738166809, |
|
"logits/rejected": -1.098435640335083, |
|
"logps/chosen": -208.4182586669922, |
|
"logps/rejected": -194.59750366210938, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03818322345614433, |
|
"rewards/margins": 0.04908495396375656, |
|
"rewards/margins_max": 0.07002463191747665, |
|
"rewards/margins_min": 0.02814526855945587, |
|
"rewards/margins_std": 0.029613185673952103, |
|
"rewards/rejected": -0.010901734232902527, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 2.419984777790596e-12, |
|
"logits/chosen": -1.3360934257507324, |
|
"logits/rejected": -0.8945194482803345, |
|
"logps/chosen": -228.0156707763672, |
|
"logps/rejected": -237.915283203125, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.045156557112932205, |
|
"rewards/margins": 0.06454546749591827, |
|
"rewards/margins_max": 0.10190453380346298, |
|
"rewards/margins_min": 0.02718639373779297, |
|
"rewards/margins_std": 0.05283369496464729, |
|
"rewards/rejected": -0.01938890479505062, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.0169051885604858, |
|
"eval_logits/rejected": -0.8946173191070557, |
|
"eval_logps/chosen": -322.6468811035156, |
|
"eval_logps/rejected": -313.6658020019531, |
|
"eval_loss": 0.6918271780014038, |
|
"eval_rewards/accuracies": 0.5540000200271606, |
|
"eval_rewards/chosen": 0.02316886931657791, |
|
"eval_rewards/margins": 0.0031846188940107822, |
|
"eval_rewards/margins_max": 0.06275644898414612, |
|
"eval_rewards/margins_min": -0.059831298887729645, |
|
"eval_rewards/margins_std": 0.040721021592617035, |
|
"eval_rewards/rejected": 0.019984247162938118, |
|
"eval_runtime": 1444.6396, |
|
"eval_samples_per_second": 2.769, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3174, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6703614967006065, |
|
"train_runtime": 26793.455, |
|
"train_samples_per_second": 0.948, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3174, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|