|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9992254066615027, |
|
"eval_steps": 100, |
|
"global_step": 726, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.84931506849315e-09, |
|
"logits/chosen": -2.3569769859313965, |
|
"logits/rejected": -2.397932529449463, |
|
"logps/chosen": -275.3341369628906, |
|
"logps/rejected": -209.60626220703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.84931506849315e-08, |
|
"logits/chosen": -2.4118523597717285, |
|
"logits/rejected": -2.3359429836273193, |
|
"logps/chosen": -296.8043212890625, |
|
"logps/rejected": -226.9486541748047, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4618055522441864, |
|
"rewards/chosen": -0.0005276877782307565, |
|
"rewards/margins": -0.0006244900869205594, |
|
"rewards/rejected": 9.680193034000695e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -2.4295315742492676, |
|
"logits/rejected": -2.379143714904785, |
|
"logps/chosen": -281.6752014160156, |
|
"logps/rejected": -217.3535919189453, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": 0.0024631484411656857, |
|
"rewards/margins": 5.021132437832421e-06, |
|
"rewards/rejected": 0.0024581279139965773, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.054794520547945e-07, |
|
"logits/chosen": -2.385784149169922, |
|
"logits/rejected": -2.3432505130767822, |
|
"logps/chosen": -255.5972442626953, |
|
"logps/rejected": -208.0276336669922, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.0008739754557609558, |
|
"rewards/margins": 0.002002383815124631, |
|
"rewards/rejected": -0.0011284081265330315, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -2.449314832687378, |
|
"logits/rejected": -2.3985402584075928, |
|
"logps/chosen": -286.9411926269531, |
|
"logps/rejected": -217.51119995117188, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.0011719572357833385, |
|
"rewards/margins": 0.002339401515200734, |
|
"rewards/rejected": -0.0011674443958327174, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.424657534246575e-07, |
|
"logits/chosen": -2.4232876300811768, |
|
"logits/rejected": -2.3819985389709473, |
|
"logps/chosen": -270.04620361328125, |
|
"logps/rejected": -224.44857788085938, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0045755826868116856, |
|
"rewards/margins": 0.007746423594653606, |
|
"rewards/rejected": -0.003170841606333852, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -2.406489133834839, |
|
"logits/rejected": -2.376723289489746, |
|
"logps/chosen": -269.7476501464844, |
|
"logps/rejected": -216.1673583984375, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.00798078440129757, |
|
"rewards/margins": 0.009231673553586006, |
|
"rewards/rejected": -0.0012508893851190805, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.794520547945205e-07, |
|
"logits/chosen": -2.3951592445373535, |
|
"logits/rejected": -2.3819260597229004, |
|
"logps/chosen": -257.29620361328125, |
|
"logps/rejected": -215.2325897216797, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.012629570439457893, |
|
"rewards/margins": 0.013305542059242725, |
|
"rewards/rejected": -0.0006759737734682858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.946401225114854e-07, |
|
"logits/chosen": -2.4296867847442627, |
|
"logits/rejected": -2.368792772293091, |
|
"logps/chosen": -267.8879699707031, |
|
"logps/rejected": -219.56350708007812, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.019007008522748947, |
|
"rewards/margins": 0.0214321780949831, |
|
"rewards/rejected": -0.002425167942419648, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.869831546707504e-07, |
|
"logits/chosen": -2.463676929473877, |
|
"logits/rejected": -2.3972015380859375, |
|
"logps/chosen": -274.6397705078125, |
|
"logps/rejected": -225.1620635986328, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": 0.024952612817287445, |
|
"rewards/margins": 0.02081022970378399, |
|
"rewards/rejected": 0.004142382647842169, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.793261868300153e-07, |
|
"logits/chosen": -2.4068779945373535, |
|
"logits/rejected": -2.375128746032715, |
|
"logps/chosen": -276.1687927246094, |
|
"logps/rejected": -228.1005096435547, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.660937488079071, |
|
"rewards/chosen": 0.03607472777366638, |
|
"rewards/margins": 0.034400396049022675, |
|
"rewards/rejected": 0.001674329163506627, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7166921898928023e-07, |
|
"logits/chosen": -2.445211887359619, |
|
"logits/rejected": -2.3825695514678955, |
|
"logps/chosen": -254.85311889648438, |
|
"logps/rejected": -222.86117553710938, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": 0.034680236130952835, |
|
"rewards/margins": 0.035091597586870193, |
|
"rewards/rejected": -0.00041136034997180104, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.640122511485451e-07, |
|
"logits/chosen": -2.412264585494995, |
|
"logits/rejected": -2.3916800022125244, |
|
"logps/chosen": -259.5788879394531, |
|
"logps/rejected": -220.30831909179688, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.6421874761581421, |
|
"rewards/chosen": 0.04985843971371651, |
|
"rewards/margins": 0.050119031220674515, |
|
"rewards/rejected": -0.00026059610536322, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.563552833078101e-07, |
|
"logits/chosen": -2.4411113262176514, |
|
"logits/rejected": -2.376892566680908, |
|
"logps/chosen": -268.37335205078125, |
|
"logps/rejected": -226.0688934326172, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.05422825738787651, |
|
"rewards/margins": 0.05518989637494087, |
|
"rewards/rejected": -0.0009616309544071555, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4869831546707505e-07, |
|
"logits/chosen": -2.448073387145996, |
|
"logits/rejected": -2.3861663341522217, |
|
"logps/chosen": -273.23272705078125, |
|
"logps/rejected": -230.17776489257812, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.06362518668174744, |
|
"rewards/margins": 0.06625331938266754, |
|
"rewards/rejected": -0.002628129906952381, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4104134762633994e-07, |
|
"logits/chosen": -2.480207920074463, |
|
"logits/rejected": -2.4179327487945557, |
|
"logps/chosen": -268.4297790527344, |
|
"logps/rejected": -226.82815551757812, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07131167501211166, |
|
"rewards/margins": 0.0752154216170311, |
|
"rewards/rejected": -0.0039037547539919615, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.333843797856049e-07, |
|
"logits/chosen": -2.438366651535034, |
|
"logits/rejected": -2.3843159675598145, |
|
"logps/chosen": -281.81756591796875, |
|
"logps/rejected": -239.0724334716797, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.08467759937047958, |
|
"rewards/margins": 0.08220230042934418, |
|
"rewards/rejected": 0.002475299406796694, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.257274119448698e-07, |
|
"logits/chosen": -2.4684793949127197, |
|
"logits/rejected": -2.433481216430664, |
|
"logps/chosen": -272.9942932128906, |
|
"logps/rejected": -239.0500946044922, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": 0.08272445946931839, |
|
"rewards/margins": 0.08306626230478287, |
|
"rewards/rejected": -0.00034180469810962677, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.180704441041347e-07, |
|
"logits/chosen": -2.429438829421997, |
|
"logits/rejected": -2.378554105758667, |
|
"logps/chosen": -259.08294677734375, |
|
"logps/rejected": -225.7974853515625, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08147875219583511, |
|
"rewards/margins": 0.07715155184268951, |
|
"rewards/rejected": 0.004327205941081047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1041347626339966e-07, |
|
"logits/chosen": -2.4421346187591553, |
|
"logits/rejected": -2.3960843086242676, |
|
"logps/chosen": -263.78216552734375, |
|
"logps/rejected": -212.2428436279297, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.09069393575191498, |
|
"rewards/margins": 0.1026170402765274, |
|
"rewards/rejected": -0.011923106387257576, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.027565084226646e-07, |
|
"logits/chosen": -2.463709831237793, |
|
"logits/rejected": -2.404499053955078, |
|
"logps/chosen": -264.03118896484375, |
|
"logps/rejected": -217.3473358154297, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.10673630237579346, |
|
"rewards/margins": 0.1264755129814148, |
|
"rewards/rejected": -0.019739216193556786, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9509954058192954e-07, |
|
"logits/chosen": -2.458402633666992, |
|
"logits/rejected": -2.4066162109375, |
|
"logps/chosen": -286.2770690917969, |
|
"logps/rejected": -230.6191864013672, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.10997898876667023, |
|
"rewards/margins": 0.12333294004201889, |
|
"rewards/rejected": -0.013353955931961536, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874425727411945e-07, |
|
"logits/chosen": -2.419983386993408, |
|
"logits/rejected": -2.386007308959961, |
|
"logps/chosen": -278.9669494628906, |
|
"logps/rejected": -227.8882293701172, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.11453185975551605, |
|
"rewards/margins": 0.13373538851737976, |
|
"rewards/rejected": -0.01920352131128311, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797856049004594e-07, |
|
"logits/chosen": -2.4336562156677246, |
|
"logits/rejected": -2.36842679977417, |
|
"logps/chosen": -266.24053955078125, |
|
"logps/rejected": -227.7322235107422, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.12494595348834991, |
|
"rewards/margins": 0.14494453370571136, |
|
"rewards/rejected": -0.019998596981167793, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7212863705972436e-07, |
|
"logits/chosen": -2.45171856880188, |
|
"logits/rejected": -2.4028737545013428, |
|
"logps/chosen": -273.0599365234375, |
|
"logps/rejected": -221.7194366455078, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.13546457886695862, |
|
"rewards/margins": 0.1584644615650177, |
|
"rewards/rejected": -0.02299986407160759, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.1062474250793457, |
|
"eval_logits/rejected": -1.9863277673721313, |
|
"eval_logps/chosen": -266.3520812988281, |
|
"eval_logps/rejected": -220.30364990234375, |
|
"eval_loss": 0.6318375468254089, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.12281632423400879, |
|
"eval_rewards/margins": 0.1532445251941681, |
|
"eval_rewards/rejected": -0.030428189784288406, |
|
"eval_runtime": 170.6685, |
|
"eval_samples_per_second": 11.719, |
|
"eval_steps_per_second": 0.732, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6447166921898925e-07, |
|
"logits/chosen": -2.3959295749664307, |
|
"logits/rejected": -2.3386054039001465, |
|
"logps/chosen": -259.109619140625, |
|
"logps/rejected": -209.0337677001953, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.12544557452201843, |
|
"rewards/margins": 0.1500503271818161, |
|
"rewards/rejected": -0.024604763835668564, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568147013782542e-07, |
|
"logits/chosen": -2.4076590538024902, |
|
"logits/rejected": -2.3606529235839844, |
|
"logps/chosen": -263.9520568847656, |
|
"logps/rejected": -225.4473876953125, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.1394219994544983, |
|
"rewards/margins": 0.1773197054862976, |
|
"rewards/rejected": -0.03789771348237991, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4915773353751913e-07, |
|
"logits/chosen": -2.417232036590576, |
|
"logits/rejected": -2.346893072128296, |
|
"logps/chosen": -280.55291748046875, |
|
"logps/rejected": -234.11996459960938, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": 0.1420687586069107, |
|
"rewards/margins": 0.20756664872169495, |
|
"rewards/rejected": -0.06549788266420364, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.41500765696784e-07, |
|
"logits/chosen": -2.4278924465179443, |
|
"logits/rejected": -2.3850674629211426, |
|
"logps/chosen": -253.51803588867188, |
|
"logps/rejected": -224.95968627929688, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.13101010024547577, |
|
"rewards/margins": 0.16972467303276062, |
|
"rewards/rejected": -0.03871458023786545, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.33843797856049e-07, |
|
"logits/chosen": -2.3946681022644043, |
|
"logits/rejected": -2.380558967590332, |
|
"logps/chosen": -284.79608154296875, |
|
"logps/rejected": -226.66262817382812, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.13935603201389313, |
|
"rewards/margins": 0.20914654433727264, |
|
"rewards/rejected": -0.06979051232337952, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2618683001531396e-07, |
|
"logits/chosen": -2.3978686332702637, |
|
"logits/rejected": -2.3249762058258057, |
|
"logps/chosen": -263.2580871582031, |
|
"logps/rejected": -226.63101196289062, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.14139556884765625, |
|
"rewards/margins": 0.1872793436050415, |
|
"rewards/rejected": -0.04588378220796585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1852986217457885e-07, |
|
"logits/chosen": -2.4475905895233154, |
|
"logits/rejected": -2.363696575164795, |
|
"logps/chosen": -262.8795471191406, |
|
"logps/rejected": -227.06930541992188, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.15051104128360748, |
|
"rewards/margins": 0.1968255192041397, |
|
"rewards/rejected": -0.04631447046995163, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.108728943338438e-07, |
|
"logits/chosen": -2.438253164291382, |
|
"logits/rejected": -2.421774387359619, |
|
"logps/chosen": -253.95639038085938, |
|
"logps/rejected": -236.4005584716797, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.13959024846553802, |
|
"rewards/margins": 0.1706792563199997, |
|
"rewards/rejected": -0.031088998541235924, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0321592649310873e-07, |
|
"logits/chosen": -2.4183051586151123, |
|
"logits/rejected": -2.339179039001465, |
|
"logps/chosen": -254.692626953125, |
|
"logps/rejected": -222.8722381591797, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1566968411207199, |
|
"rewards/margins": 0.19593168795108795, |
|
"rewards/rejected": -0.03923482820391655, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.955589586523736e-07, |
|
"logits/chosen": -2.4377613067626953, |
|
"logits/rejected": -2.366072177886963, |
|
"logps/chosen": -267.422607421875, |
|
"logps/rejected": -224.8284149169922, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": 0.14487192034721375, |
|
"rewards/margins": 0.2071552723646164, |
|
"rewards/rejected": -0.06228336691856384, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8790199081163856e-07, |
|
"logits/chosen": -2.435045003890991, |
|
"logits/rejected": -2.3581719398498535, |
|
"logps/chosen": -267.0567626953125, |
|
"logps/rejected": -223.5650177001953, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": 0.1428806185722351, |
|
"rewards/margins": 0.20400968194007874, |
|
"rewards/rejected": -0.06112906336784363, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.802450229709035e-07, |
|
"logits/chosen": -2.3934175968170166, |
|
"logits/rejected": -2.3315463066101074, |
|
"logps/chosen": -256.1235656738281, |
|
"logps/rejected": -211.8139190673828, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.14465923607349396, |
|
"rewards/margins": 0.20695683360099792, |
|
"rewards/rejected": -0.06229761987924576, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.725880551301684e-07, |
|
"logits/chosen": -2.4487271308898926, |
|
"logits/rejected": -2.3933699131011963, |
|
"logps/chosen": -275.8642578125, |
|
"logps/rejected": -231.2057342529297, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.14190760254859924, |
|
"rewards/margins": 0.22471928596496582, |
|
"rewards/rejected": -0.08281168341636658, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.649310872894334e-07, |
|
"logits/chosen": -2.4377894401550293, |
|
"logits/rejected": -2.406348466873169, |
|
"logps/chosen": -276.6672058105469, |
|
"logps/rejected": -221.1224365234375, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.15706932544708252, |
|
"rewards/margins": 0.24053001403808594, |
|
"rewards/rejected": -0.08346069604158401, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.572741194486983e-07, |
|
"logits/chosen": -2.4386019706726074, |
|
"logits/rejected": -2.3754172325134277, |
|
"logps/chosen": -274.6893005371094, |
|
"logps/rejected": -206.1323699951172, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.167318657040596, |
|
"rewards/margins": 0.25797349214553833, |
|
"rewards/rejected": -0.09065485745668411, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.496171516079632e-07, |
|
"logits/chosen": -2.4488420486450195, |
|
"logits/rejected": -2.3803772926330566, |
|
"logps/chosen": -294.0111389160156, |
|
"logps/rejected": -237.24282836914062, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": 0.17194847762584686, |
|
"rewards/margins": 0.2693827748298645, |
|
"rewards/rejected": -0.09743430465459824, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4196018376722816e-07, |
|
"logits/chosen": -2.413655996322632, |
|
"logits/rejected": -2.38279390335083, |
|
"logps/chosen": -279.74090576171875, |
|
"logps/rejected": -226.3095245361328, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.16821043193340302, |
|
"rewards/margins": 0.26299187541007996, |
|
"rewards/rejected": -0.09478144347667694, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.343032159264931e-07, |
|
"logits/chosen": -2.390028476715088, |
|
"logits/rejected": -2.3162546157836914, |
|
"logps/chosen": -270.34320068359375, |
|
"logps/rejected": -221.6541290283203, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.17296920716762543, |
|
"rewards/margins": 0.2894430458545685, |
|
"rewards/rejected": -0.11647380888462067, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.26646248085758e-07, |
|
"logits/chosen": -2.3680365085601807, |
|
"logits/rejected": -2.3160338401794434, |
|
"logps/chosen": -260.14593505859375, |
|
"logps/rejected": -214.6702423095703, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.16743981838226318, |
|
"rewards/margins": 0.2915259897708893, |
|
"rewards/rejected": -0.1240861564874649, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1898928024502298e-07, |
|
"logits/chosen": -2.443164825439453, |
|
"logits/rejected": -2.3816096782684326, |
|
"logps/chosen": -264.55914306640625, |
|
"logps/rejected": -219.47341918945312, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.15291035175323486, |
|
"rewards/margins": 0.2545214593410492, |
|
"rewards/rejected": -0.10161112248897552, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.113323124042879e-07, |
|
"logits/chosen": -2.416532039642334, |
|
"logits/rejected": -2.3757832050323486, |
|
"logps/chosen": -272.0744934082031, |
|
"logps/rejected": -228.9375, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.17259187996387482, |
|
"rewards/margins": 0.2831020951271057, |
|
"rewards/rejected": -0.1105102151632309, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.036753445635528e-07, |
|
"logits/chosen": -2.4263906478881836, |
|
"logits/rejected": -2.4025325775146484, |
|
"logps/chosen": -284.97576904296875, |
|
"logps/rejected": -235.3527069091797, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.16448193788528442, |
|
"rewards/margins": 0.32010418176651, |
|
"rewards/rejected": -0.1556222140789032, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9601837672281775e-07, |
|
"logits/chosen": -2.402890920639038, |
|
"logits/rejected": -2.3698153495788574, |
|
"logps/chosen": -271.0093688964844, |
|
"logps/rejected": -234.02481079101562, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.14684638381004333, |
|
"rewards/margins": 0.2707447409629822, |
|
"rewards/rejected": -0.12389836460351944, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.883614088820827e-07, |
|
"logits/chosen": -2.425776958465576, |
|
"logits/rejected": -2.371450901031494, |
|
"logps/chosen": -269.1894226074219, |
|
"logps/rejected": -222.6921844482422, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.1555326133966446, |
|
"rewards/margins": 0.28726813197135925, |
|
"rewards/rejected": -0.13173556327819824, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.09140682220459, |
|
"eval_logits/rejected": -1.9723255634307861, |
|
"eval_logps/chosen": -266.0958557128906, |
|
"eval_logps/rejected": -221.3338165283203, |
|
"eval_loss": 0.598257839679718, |
|
"eval_rewards/accuracies": 0.6759999990463257, |
|
"eval_rewards/chosen": 0.14844101667404175, |
|
"eval_rewards/margins": 0.28188496828079224, |
|
"eval_rewards/rejected": -0.1334439367055893, |
|
"eval_runtime": 171.7034, |
|
"eval_samples_per_second": 11.648, |
|
"eval_steps_per_second": 0.728, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.807044410413476e-07, |
|
"logits/chosen": -2.3953592777252197, |
|
"logits/rejected": -2.363999843597412, |
|
"logps/chosen": -258.8542175292969, |
|
"logps/rejected": -233.6983642578125, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": 0.1408676654100418, |
|
"rewards/margins": 0.27908438444137573, |
|
"rewards/rejected": -0.13821670413017273, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7304747320061255e-07, |
|
"logits/chosen": -2.3902556896209717, |
|
"logits/rejected": -2.3884072303771973, |
|
"logps/chosen": -265.0683288574219, |
|
"logps/rejected": -228.423828125, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.17755301296710968, |
|
"rewards/margins": 0.3005513548851013, |
|
"rewards/rejected": -0.12299831956624985, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6539050535987747e-07, |
|
"logits/chosen": -2.473872661590576, |
|
"logits/rejected": -2.3617444038391113, |
|
"logps/chosen": -270.9250793457031, |
|
"logps/rejected": -222.18264770507812, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": 0.16197429597377777, |
|
"rewards/margins": 0.3233007788658142, |
|
"rewards/rejected": -0.16132643818855286, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5773353751914243e-07, |
|
"logits/chosen": -2.4702870845794678, |
|
"logits/rejected": -2.4053397178649902, |
|
"logps/chosen": -285.6563720703125, |
|
"logps/rejected": -227.668212890625, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": 0.1673307716846466, |
|
"rewards/margins": 0.3238461911678314, |
|
"rewards/rejected": -0.15651538968086243, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5007656967840735e-07, |
|
"logits/chosen": -2.381838798522949, |
|
"logits/rejected": -2.3299927711486816, |
|
"logps/chosen": -254.9144287109375, |
|
"logps/rejected": -228.2997589111328, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.16740316152572632, |
|
"rewards/margins": 0.29852622747421265, |
|
"rewards/rejected": -0.13112305104732513, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4241960183767226e-07, |
|
"logits/chosen": -2.4169716835021973, |
|
"logits/rejected": -2.3592681884765625, |
|
"logps/chosen": -283.3446350097656, |
|
"logps/rejected": -233.24679565429688, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.1610698103904724, |
|
"rewards/margins": 0.3080851435661316, |
|
"rewards/rejected": -0.1470153033733368, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.347626339969372e-07, |
|
"logits/chosen": -2.408930540084839, |
|
"logits/rejected": -2.37349009513855, |
|
"logps/chosen": -271.91973876953125, |
|
"logps/rejected": -240.18679809570312, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.17847296595573425, |
|
"rewards/margins": 0.29651308059692383, |
|
"rewards/rejected": -0.118040069937706, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2710566615620215e-07, |
|
"logits/chosen": -2.367671489715576, |
|
"logits/rejected": -2.320443868637085, |
|
"logps/chosen": -275.58294677734375, |
|
"logps/rejected": -222.78842163085938, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.17732994258403778, |
|
"rewards/margins": 0.3325561583042145, |
|
"rewards/rejected": -0.1552262306213379, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1944869831546706e-07, |
|
"logits/chosen": -2.388644218444824, |
|
"logits/rejected": -2.3782949447631836, |
|
"logps/chosen": -268.08172607421875, |
|
"logps/rejected": -236.77163696289062, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.1551700383424759, |
|
"rewards/margins": 0.3083476424217224, |
|
"rewards/rejected": -0.15317757427692413, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.11791730474732e-07, |
|
"logits/chosen": -2.398094654083252, |
|
"logits/rejected": -2.3552727699279785, |
|
"logps/chosen": -263.4698181152344, |
|
"logps/rejected": -223.05538940429688, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.16049396991729736, |
|
"rewards/margins": 0.35638219118118286, |
|
"rewards/rejected": -0.1958882361650467, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0413476263399694e-07, |
|
"logits/chosen": -2.4469425678253174, |
|
"logits/rejected": -2.392503261566162, |
|
"logps/chosen": -270.57635498046875, |
|
"logps/rejected": -216.8152618408203, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.15162338316440582, |
|
"rewards/margins": 0.3167043924331665, |
|
"rewards/rejected": -0.1650809943675995, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.647779479326186e-08, |
|
"logits/chosen": -2.3848066329956055, |
|
"logits/rejected": -2.3738114833831787, |
|
"logps/chosen": -251.4600372314453, |
|
"logps/rejected": -222.6741180419922, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.16963472962379456, |
|
"rewards/margins": 0.2706124782562256, |
|
"rewards/rejected": -0.10097774118185043, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.88208269525268e-08, |
|
"logits/chosen": -2.377875804901123, |
|
"logits/rejected": -2.374070644378662, |
|
"logps/chosen": -261.51788330078125, |
|
"logps/rejected": -221.13034057617188, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.1429089605808258, |
|
"rewards/margins": 0.29002851247787476, |
|
"rewards/rejected": -0.14711955189704895, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.116385911179173e-08, |
|
"logits/chosen": -2.3656888008117676, |
|
"logits/rejected": -2.3523547649383545, |
|
"logps/chosen": -281.01617431640625, |
|
"logps/rejected": -217.3480682373047, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.17781397700309753, |
|
"rewards/margins": 0.3453444540500641, |
|
"rewards/rejected": -0.16753047704696655, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.350689127105667e-08, |
|
"logits/chosen": -2.4372496604919434, |
|
"logits/rejected": -2.3591084480285645, |
|
"logps/chosen": -273.20965576171875, |
|
"logps/rejected": -229.5889434814453, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": 0.17219647765159607, |
|
"rewards/margins": 0.35740959644317627, |
|
"rewards/rejected": -0.185213103890419, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.584992343032159e-08, |
|
"logits/chosen": -2.351811647415161, |
|
"logits/rejected": -2.3350961208343506, |
|
"logps/chosen": -263.6435241699219, |
|
"logps/rejected": -226.4728240966797, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.15416522324085236, |
|
"rewards/margins": 0.29506421089172363, |
|
"rewards/rejected": -0.14089898765087128, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.819295558958652e-08, |
|
"logits/chosen": -2.390491008758545, |
|
"logits/rejected": -2.3410990238189697, |
|
"logps/chosen": -289.5431823730469, |
|
"logps/rejected": -219.86788940429688, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": 0.18201851844787598, |
|
"rewards/margins": 0.38538652658462524, |
|
"rewards/rejected": -0.20336803793907166, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.0535987748851455e-08, |
|
"logits/chosen": -2.449439525604248, |
|
"logits/rejected": -2.3970067501068115, |
|
"logps/chosen": -276.04620361328125, |
|
"logps/rejected": -229.76620483398438, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.15049387514591217, |
|
"rewards/margins": 0.32289570569992065, |
|
"rewards/rejected": -0.17240183055400848, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.287901990811638e-08, |
|
"logits/chosen": -2.436511993408203, |
|
"logits/rejected": -2.341296672821045, |
|
"logps/chosen": -269.1939392089844, |
|
"logps/rejected": -242.32968139648438, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.16247320175170898, |
|
"rewards/margins": 0.3472925126552582, |
|
"rewards/rejected": -0.1848193258047104, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.522205206738132e-08, |
|
"logits/chosen": -2.3986496925354004, |
|
"logits/rejected": -2.3604884147644043, |
|
"logps/chosen": -276.03436279296875, |
|
"logps/rejected": -227.72573852539062, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.16009405255317688, |
|
"rewards/margins": 0.34014397859573364, |
|
"rewards/rejected": -0.18004995584487915, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7565084226646246e-08, |
|
"logits/chosen": -2.375347137451172, |
|
"logits/rejected": -2.3558340072631836, |
|
"logps/chosen": -265.17486572265625, |
|
"logps/rejected": -234.3466796875, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.16287991404533386, |
|
"rewards/margins": 0.35990676283836365, |
|
"rewards/rejected": -0.19702686369419098, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9908116385911178e-08, |
|
"logits/chosen": -2.4065492153167725, |
|
"logits/rejected": -2.36779522895813, |
|
"logps/chosen": -261.1010437011719, |
|
"logps/rejected": -224.9178009033203, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": 0.15390679240226746, |
|
"rewards/margins": 0.2883544862270355, |
|
"rewards/rejected": -0.13444769382476807, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.225114854517611e-08, |
|
"logits/chosen": -2.433964252471924, |
|
"logits/rejected": -2.3655142784118652, |
|
"logps/chosen": -281.37139892578125, |
|
"logps/rejected": -219.88296508789062, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.1487404853105545, |
|
"rewards/margins": 0.35009315609931946, |
|
"rewards/rejected": -0.20135268568992615, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.594180704441042e-09, |
|
"logits/chosen": -2.3966562747955322, |
|
"logits/rejected": -2.368652820587158, |
|
"logps/chosen": -258.764892578125, |
|
"logps/rejected": -223.00405883789062, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": 0.14300301671028137, |
|
"rewards/margins": 0.3152904212474823, |
|
"rewards/rejected": -0.17228738963603973, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.085902214050293, |
|
"eval_logits/rejected": -1.9675065279006958, |
|
"eval_logps/chosen": -266.14141845703125, |
|
"eval_logps/rejected": -221.83285522460938, |
|
"eval_loss": 0.5895045399665833, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": 0.14388784766197205, |
|
"eval_rewards/margins": 0.3272360563278198, |
|
"eval_rewards/rejected": -0.1833481788635254, |
|
"eval_runtime": 170.0, |
|
"eval_samples_per_second": 11.765, |
|
"eval_steps_per_second": 0.735, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 726, |
|
"total_flos": 0.0, |
|
"train_loss": 0.622471270236102, |
|
"train_runtime": 20371.8366, |
|
"train_samples_per_second": 9.125, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 726, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|