|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 53.52218298444476, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0146243572235107, |
|
"logits/rejected": -0.9850981831550598, |
|
"logps/chosen": -0.27403339743614197, |
|
"logps/rejected": -0.2716384530067444, |
|
"loss": 3.0444, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -2.7403340339660645, |
|
"rewards/margins": -0.02394939959049225, |
|
"rewards/rejected": -2.7163848876953125, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 39.10999969888965, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0449364185333252, |
|
"logits/rejected": -0.9776930809020996, |
|
"logps/chosen": -0.29451489448547363, |
|
"logps/rejected": -0.2995792329311371, |
|
"loss": 3.0211, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.9451489448547363, |
|
"rewards/margins": 0.05064352601766586, |
|
"rewards/rejected": -2.9957923889160156, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 53.821066581509214, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9672248959541321, |
|
"logits/rejected": -0.9867329597473145, |
|
"logps/chosen": -0.26386433839797974, |
|
"logps/rejected": -0.30063143372535706, |
|
"loss": 3.0404, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.638643503189087, |
|
"rewards/margins": 0.3676711320877075, |
|
"rewards/rejected": -3.006314516067505, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 86.6542555553414, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9602643847465515, |
|
"logits/rejected": -0.9344671964645386, |
|
"logps/chosen": -0.2776374816894531, |
|
"logps/rejected": -0.29131022095680237, |
|
"loss": 2.9793, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.7763748168945312, |
|
"rewards/margins": 0.13672712445259094, |
|
"rewards/rejected": -2.913102149963379, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 56.919799993589805, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.0135596990585327, |
|
"logits/rejected": -0.9844949841499329, |
|
"logps/chosen": -0.2717221677303314, |
|
"logps/rejected": -0.2782990336418152, |
|
"loss": 3.124, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.717221736907959, |
|
"rewards/margins": 0.06576814502477646, |
|
"rewards/rejected": -2.7829902172088623, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 45.796379698409524, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9898878931999207, |
|
"logits/rejected": -0.9455238580703735, |
|
"logps/chosen": -0.2733747959136963, |
|
"logps/rejected": -0.279060035943985, |
|
"loss": 2.8977, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -2.733747959136963, |
|
"rewards/margins": 0.05685253068804741, |
|
"rewards/rejected": -2.790600299835205, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 64.64288788170485, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.0491113662719727, |
|
"logits/rejected": -0.9738750457763672, |
|
"logps/chosen": -0.2941775918006897, |
|
"logps/rejected": -0.32069069147109985, |
|
"loss": 2.9119, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.9417757987976074, |
|
"rewards/margins": 0.2651310861110687, |
|
"rewards/rejected": -3.206906795501709, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 60.56769615337976, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.0074384212493896, |
|
"logits/rejected": -0.963466465473175, |
|
"logps/chosen": -0.2797192931175232, |
|
"logps/rejected": -0.3225395083427429, |
|
"loss": 2.9345, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.7971930503845215, |
|
"rewards/margins": 0.4282020032405853, |
|
"rewards/rejected": -3.2253952026367188, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 48.675093440338955, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.0469945669174194, |
|
"logits/rejected": -1.0040814876556396, |
|
"logps/chosen": -0.33255186676979065, |
|
"logps/rejected": -0.38402628898620605, |
|
"loss": 2.9815, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.3255183696746826, |
|
"rewards/margins": 0.5147446393966675, |
|
"rewards/rejected": -3.8402628898620605, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 92.08652708998007, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.038892388343811, |
|
"logits/rejected": -0.988103985786438, |
|
"logps/chosen": -0.34245526790618896, |
|
"logps/rejected": -0.38594862818717957, |
|
"loss": 3.0508, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -3.4245529174804688, |
|
"rewards/margins": 0.4349338412284851, |
|
"rewards/rejected": -3.8594863414764404, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 72.54827446103837, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0567952394485474, |
|
"logits/rejected": -1.0215675830841064, |
|
"logps/chosen": -0.28753459453582764, |
|
"logps/rejected": -0.3490275740623474, |
|
"loss": 2.7982, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.8753461837768555, |
|
"rewards/margins": 0.6149295568466187, |
|
"rewards/rejected": -3.4902758598327637, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 55.56312267177659, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.0922194719314575, |
|
"logits/rejected": -1.059291958808899, |
|
"logps/chosen": -0.3225264847278595, |
|
"logps/rejected": -0.3470703959465027, |
|
"loss": 2.8716, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.22526478767395, |
|
"rewards/margins": 0.2454390972852707, |
|
"rewards/rejected": -3.4707038402557373, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 54.05440384507174, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -0.9975064992904663, |
|
"logits/rejected": -0.9689160585403442, |
|
"logps/chosen": -0.37468865513801575, |
|
"logps/rejected": -0.43205341696739197, |
|
"loss": 2.7901, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.746886730194092, |
|
"rewards/margins": 0.5736472010612488, |
|
"rewards/rejected": -4.3205342292785645, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 37.66775098927071, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.0229814052581787, |
|
"logits/rejected": -0.9982998967170715, |
|
"logps/chosen": -0.3514581620693207, |
|
"logps/rejected": -0.4274352192878723, |
|
"loss": 2.8718, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.5145821571350098, |
|
"rewards/margins": 0.7597699761390686, |
|
"rewards/rejected": -4.274352073669434, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 51.934633835606974, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.9838461875915527, |
|
"logits/rejected": -0.9134309887886047, |
|
"logps/chosen": -0.35928577184677124, |
|
"logps/rejected": -0.4099213182926178, |
|
"loss": 2.8345, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.592857837677002, |
|
"rewards/margins": 0.5063551664352417, |
|
"rewards/rejected": -4.099213123321533, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 46.83578017177419, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9681940078735352, |
|
"logits/rejected": -0.9539217948913574, |
|
"logps/chosen": -0.353752076625824, |
|
"logps/rejected": -0.4523216187953949, |
|
"loss": 2.7878, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.53752064704895, |
|
"rewards/margins": 0.985695481300354, |
|
"rewards/rejected": -4.523216247558594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 54.174949491419966, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.9862138628959656, |
|
"logits/rejected": -0.9641338586807251, |
|
"logps/chosen": -0.3405635952949524, |
|
"logps/rejected": -0.39860305190086365, |
|
"loss": 2.6715, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.4056358337402344, |
|
"rewards/margins": 0.5803946852684021, |
|
"rewards/rejected": -3.9860305786132812, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 62.18682762469074, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.028374195098877, |
|
"logits/rejected": -0.9928615689277649, |
|
"logps/chosen": -0.4192899763584137, |
|
"logps/rejected": -0.5028694868087769, |
|
"loss": 2.8803, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -4.192899703979492, |
|
"rewards/margins": 0.8357950448989868, |
|
"rewards/rejected": -5.028695106506348, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 56.712862810919404, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.102429747581482, |
|
"logits/rejected": -1.017956256866455, |
|
"logps/chosen": -0.4515204429626465, |
|
"logps/rejected": -0.49105948209762573, |
|
"loss": 2.7854, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -4.515204429626465, |
|
"rewards/margins": 0.39539000391960144, |
|
"rewards/rejected": -4.910594463348389, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 77.56651991727357, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -1.0002816915512085, |
|
"logits/rejected": -0.9756115078926086, |
|
"logps/chosen": -0.4405655860900879, |
|
"logps/rejected": -0.5030835866928101, |
|
"loss": 2.8381, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.405655860900879, |
|
"rewards/margins": 0.6251801252365112, |
|
"rewards/rejected": -5.0308356285095215, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 63.78609875386195, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -1.0079588890075684, |
|
"logits/rejected": -0.9540907144546509, |
|
"logps/chosen": -0.41310757398605347, |
|
"logps/rejected": -0.5235550999641418, |
|
"loss": 2.7704, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -4.131075859069824, |
|
"rewards/margins": 1.1044747829437256, |
|
"rewards/rejected": -5.235550880432129, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 59.92913033519696, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.9624613523483276, |
|
"logits/rejected": -0.9022065997123718, |
|
"logps/chosen": -0.4771413207054138, |
|
"logps/rejected": -0.6054214239120483, |
|
"loss": 2.6684, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.771413326263428, |
|
"rewards/margins": 1.2828001976013184, |
|
"rewards/rejected": -6.0542144775390625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 57.71552130623015, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.0269968509674072, |
|
"logits/rejected": -0.9661616086959839, |
|
"logps/chosen": -0.5121074914932251, |
|
"logps/rejected": -0.578630268573761, |
|
"loss": 2.5559, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.121075630187988, |
|
"rewards/margins": 0.6652273535728455, |
|
"rewards/rejected": -5.7863030433654785, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 64.09249680400335, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.998211681842804, |
|
"logits/rejected": -0.9050429463386536, |
|
"logps/chosen": -0.5254617929458618, |
|
"logps/rejected": -0.7217136025428772, |
|
"loss": 2.4049, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -5.2546186447143555, |
|
"rewards/margins": 1.9625177383422852, |
|
"rewards/rejected": -7.217136383056641, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 54.10213565718134, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.06635320186615, |
|
"logits/rejected": -1.0216171741485596, |
|
"logps/chosen": -0.5953704714775085, |
|
"logps/rejected": -0.6902128458023071, |
|
"loss": 2.3251, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -5.953704833984375, |
|
"rewards/margins": 0.9484230875968933, |
|
"rewards/rejected": -6.902127742767334, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 87.63946362541415, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.0916361808776855, |
|
"logits/rejected": -1.085458517074585, |
|
"logps/chosen": -0.588487446308136, |
|
"logps/rejected": -0.8501450419425964, |
|
"loss": 2.1826, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.884873867034912, |
|
"rewards/margins": 2.6165759563446045, |
|
"rewards/rejected": -8.501450538635254, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 66.78226800807278, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.067176342010498, |
|
"logits/rejected": -1.0195186138153076, |
|
"logps/chosen": -0.6727192401885986, |
|
"logps/rejected": -0.8245170712471008, |
|
"loss": 2.1861, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -6.7271928787231445, |
|
"rewards/margins": 1.5179781913757324, |
|
"rewards/rejected": -8.245170593261719, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 77.9071558548112, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.1569595336914062, |
|
"logits/rejected": -1.1336597204208374, |
|
"logps/chosen": -0.785293698310852, |
|
"logps/rejected": -0.9337224960327148, |
|
"loss": 2.1564, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -7.852936744689941, |
|
"rewards/margins": 1.4842884540557861, |
|
"rewards/rejected": -9.337224960327148, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 109.08809267522398, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.0582095384597778, |
|
"logits/rejected": -1.03193998336792, |
|
"logps/chosen": -0.827114462852478, |
|
"logps/rejected": -1.0477594137191772, |
|
"loss": 2.0118, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -8.27114486694336, |
|
"rewards/margins": 2.206449508666992, |
|
"rewards/rejected": -10.477594375610352, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 72.02715367718524, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.0821495056152344, |
|
"logits/rejected": -1.0622715950012207, |
|
"logps/chosen": -0.9100320935249329, |
|
"logps/rejected": -1.1453698873519897, |
|
"loss": 2.0273, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -9.100319862365723, |
|
"rewards/margins": 2.353379011154175, |
|
"rewards/rejected": -11.453699111938477, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 70.83089987980944, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.092185139656067, |
|
"logits/rejected": -1.070657730102539, |
|
"logps/chosen": -0.9754332304000854, |
|
"logps/rejected": -1.2774028778076172, |
|
"loss": 2.0633, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -9.754331588745117, |
|
"rewards/margins": 3.0196967124938965, |
|
"rewards/rejected": -12.774029731750488, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 89.85353120616982, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.107634425163269, |
|
"logits/rejected": -1.0893046855926514, |
|
"logps/chosen": -1.0988253355026245, |
|
"logps/rejected": -1.4862325191497803, |
|
"loss": 2.0523, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -10.988253593444824, |
|
"rewards/margins": 3.8740711212158203, |
|
"rewards/rejected": -14.862322807312012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 60.873789300571126, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.1269104480743408, |
|
"logits/rejected": -1.1010853052139282, |
|
"logps/chosen": -1.0796130895614624, |
|
"logps/rejected": -1.4461021423339844, |
|
"loss": 1.8838, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -10.796131134033203, |
|
"rewards/margins": 3.6648898124694824, |
|
"rewards/rejected": -14.461019515991211, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 77.23211870911884, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.1438876390457153, |
|
"logits/rejected": -1.1206210851669312, |
|
"logps/chosen": -1.0647801160812378, |
|
"logps/rejected": -1.4476187229156494, |
|
"loss": 1.8488, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -10.647802352905273, |
|
"rewards/margins": 3.828385591506958, |
|
"rewards/rejected": -14.476186752319336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 97.35090322598491, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.157462239265442, |
|
"logits/rejected": -1.1056431531906128, |
|
"logps/chosen": -1.1336826086044312, |
|
"logps/rejected": -1.3956897258758545, |
|
"loss": 1.7083, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -11.336827278137207, |
|
"rewards/margins": 2.620070695877075, |
|
"rewards/rejected": -13.956896781921387, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 70.60533034676232, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.1124871969223022, |
|
"logits/rejected": -1.0904567241668701, |
|
"logps/chosen": -1.0966602563858032, |
|
"logps/rejected": -1.4549492597579956, |
|
"loss": 1.6569, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -10.966601371765137, |
|
"rewards/margins": 3.5828919410705566, |
|
"rewards/rejected": -14.549494743347168, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 84.92007593834019, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.1567202806472778, |
|
"logits/rejected": -1.0984870195388794, |
|
"logps/chosen": -1.1207507848739624, |
|
"logps/rejected": -1.5250511169433594, |
|
"loss": 1.6817, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -11.20750904083252, |
|
"rewards/margins": 4.043001651763916, |
|
"rewards/rejected": -15.250509262084961, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 82.82215861540205, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.1628299951553345, |
|
"logits/rejected": -1.170377492904663, |
|
"logps/chosen": -1.235033392906189, |
|
"logps/rejected": -1.7156970500946045, |
|
"loss": 1.5387, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -12.350334167480469, |
|
"rewards/margins": 4.806637763977051, |
|
"rewards/rejected": -17.156970977783203, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 70.50682719627838, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.1164333820343018, |
|
"logits/rejected": -1.0961400270462036, |
|
"logps/chosen": -1.2593460083007812, |
|
"logps/rejected": -1.6189504861831665, |
|
"loss": 1.6047, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -12.593461036682129, |
|
"rewards/margins": 3.5960440635681152, |
|
"rewards/rejected": -16.189504623413086, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 97.28442308133118, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.1036303043365479, |
|
"logits/rejected": -1.0911258459091187, |
|
"logps/chosen": -1.324706792831421, |
|
"logps/rejected": -1.7423721551895142, |
|
"loss": 1.5626, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -13.247068405151367, |
|
"rewards/margins": 4.176652908325195, |
|
"rewards/rejected": -17.42371940612793, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 85.54406338680343, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.154517650604248, |
|
"logits/rejected": -1.101162314414978, |
|
"logps/chosen": -1.40175461769104, |
|
"logps/rejected": -1.8435806035995483, |
|
"loss": 1.7321, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -14.017547607421875, |
|
"rewards/margins": 4.418261528015137, |
|
"rewards/rejected": -18.435808181762695, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 103.7420052940262, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.1762125492095947, |
|
"logits/rejected": -1.1645376682281494, |
|
"logps/chosen": -1.4059008359909058, |
|
"logps/rejected": -1.826703429222107, |
|
"loss": 1.6602, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.05901050567627, |
|
"rewards/margins": 4.208024024963379, |
|
"rewards/rejected": -18.267032623291016, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 115.44925865991426, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.1282669305801392, |
|
"logits/rejected": -1.1098558902740479, |
|
"logps/chosen": -1.4084670543670654, |
|
"logps/rejected": -1.8751609325408936, |
|
"loss": 1.5529, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.084672927856445, |
|
"rewards/margins": 4.666939735412598, |
|
"rewards/rejected": -18.751609802246094, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 94.83729222797992, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.191811442375183, |
|
"logits/rejected": -1.1707171201705933, |
|
"logps/chosen": -1.4761518239974976, |
|
"logps/rejected": -1.9565551280975342, |
|
"loss": 1.5052, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -14.761518478393555, |
|
"rewards/margins": 4.804032325744629, |
|
"rewards/rejected": -19.5655517578125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 69.08600083744463, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.171081304550171, |
|
"logits/rejected": -1.1351138353347778, |
|
"logps/chosen": -1.4978052377700806, |
|
"logps/rejected": -1.93888258934021, |
|
"loss": 1.4742, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.978052139282227, |
|
"rewards/margins": 4.410772800445557, |
|
"rewards/rejected": -19.388826370239258, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 72.13177261697588, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.245228886604309, |
|
"logits/rejected": -1.1923692226409912, |
|
"logps/chosen": -1.4302809238433838, |
|
"logps/rejected": -1.8505923748016357, |
|
"loss": 1.4317, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -14.302810668945312, |
|
"rewards/margins": 4.203113555908203, |
|
"rewards/rejected": -18.505924224853516, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 78.71892029667256, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.2821385860443115, |
|
"logits/rejected": -1.2519080638885498, |
|
"logps/chosen": -1.4527919292449951, |
|
"logps/rejected": -1.9279251098632812, |
|
"loss": 1.4134, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -14.527920722961426, |
|
"rewards/margins": 4.751331329345703, |
|
"rewards/rejected": -19.279251098632812, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 87.04649782214463, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.245603322982788, |
|
"logits/rejected": -1.247234582901001, |
|
"logps/chosen": -1.3713314533233643, |
|
"logps/rejected": -1.8449758291244507, |
|
"loss": 1.4346, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -13.7133150100708, |
|
"rewards/margins": 4.736443042755127, |
|
"rewards/rejected": -18.449758529663086, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 120.6181547874012, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.3496326208114624, |
|
"logits/rejected": -1.2907614707946777, |
|
"logps/chosen": -1.440033197402954, |
|
"logps/rejected": -2.0060055255889893, |
|
"loss": 1.4071, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.4003324508667, |
|
"rewards/margins": 5.659722328186035, |
|
"rewards/rejected": -20.060054779052734, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 127.16635817286267, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.2846823930740356, |
|
"logits/rejected": -1.2672080993652344, |
|
"logps/chosen": -1.5308005809783936, |
|
"logps/rejected": -2.1003577709198, |
|
"loss": 1.3208, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.308004379272461, |
|
"rewards/margins": 5.695572853088379, |
|
"rewards/rejected": -21.003578186035156, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 306.6500775815346, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.3492941856384277, |
|
"logits/rejected": -1.3214812278747559, |
|
"logps/chosen": -1.6527442932128906, |
|
"logps/rejected": -2.1238582134246826, |
|
"loss": 1.4957, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -16.527442932128906, |
|
"rewards/margins": 4.711141586303711, |
|
"rewards/rejected": -21.238582611083984, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 89.88872208917493, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.3080346584320068, |
|
"logits/rejected": -1.2766286134719849, |
|
"logps/chosen": -1.5681380033493042, |
|
"logps/rejected": -2.0582587718963623, |
|
"loss": 1.4255, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.681379318237305, |
|
"rewards/margins": 4.901208877563477, |
|
"rewards/rejected": -20.58258819580078, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 86.94866969630735, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.2753899097442627, |
|
"logits/rejected": -1.2558867931365967, |
|
"logps/chosen": -1.668534278869629, |
|
"logps/rejected": -2.181380271911621, |
|
"loss": 1.4204, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.68534278869629, |
|
"rewards/margins": 5.1284589767456055, |
|
"rewards/rejected": -21.813800811767578, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 144.84472573271995, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.2718496322631836, |
|
"logits/rejected": -1.2400305271148682, |
|
"logps/chosen": -1.4821763038635254, |
|
"logps/rejected": -2.010958194732666, |
|
"loss": 1.3379, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -14.82176399230957, |
|
"rewards/margins": 5.287820816040039, |
|
"rewards/rejected": -20.109582901000977, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 73.43309027045284, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.2501633167266846, |
|
"logits/rejected": -1.2101550102233887, |
|
"logps/chosen": -1.4654467105865479, |
|
"logps/rejected": -1.9191405773162842, |
|
"loss": 1.4963, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -14.654467582702637, |
|
"rewards/margins": 4.536937713623047, |
|
"rewards/rejected": -19.191404342651367, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 118.57888241178858, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.3355966806411743, |
|
"logits/rejected": -1.3093878030776978, |
|
"logps/chosen": -1.5047754049301147, |
|
"logps/rejected": -2.050473213195801, |
|
"loss": 1.3782, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.047755241394043, |
|
"rewards/margins": 5.456977844238281, |
|
"rewards/rejected": -20.50473403930664, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 100.79377019073691, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.307716965675354, |
|
"logits/rejected": -1.2712657451629639, |
|
"logps/chosen": -1.5421284437179565, |
|
"logps/rejected": -2.111297845840454, |
|
"loss": 1.1768, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -15.421285629272461, |
|
"rewards/margins": 5.6916913986206055, |
|
"rewards/rejected": -21.112977981567383, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 102.72150408454053, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.333150863647461, |
|
"logits/rejected": -1.2860305309295654, |
|
"logps/chosen": -1.5572869777679443, |
|
"logps/rejected": -2.028750419616699, |
|
"loss": 1.3993, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.572871208190918, |
|
"rewards/margins": 4.714633464813232, |
|
"rewards/rejected": -20.287504196166992, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 96.32710532692002, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.2499698400497437, |
|
"logits/rejected": -1.2331962585449219, |
|
"logps/chosen": -1.5715104341506958, |
|
"logps/rejected": -2.08168625831604, |
|
"loss": 1.2236, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.715105056762695, |
|
"rewards/margins": 5.101758003234863, |
|
"rewards/rejected": -20.81686019897461, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 86.57462147935358, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.1837140321731567, |
|
"logits/rejected": -1.156435251235962, |
|
"logps/chosen": -1.662043809890747, |
|
"logps/rejected": -2.1141371726989746, |
|
"loss": 1.6291, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -16.620437622070312, |
|
"rewards/margins": 4.52093505859375, |
|
"rewards/rejected": -21.141372680664062, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 80.28274687652879, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.2831798791885376, |
|
"logits/rejected": -1.2358052730560303, |
|
"logps/chosen": -1.636275053024292, |
|
"logps/rejected": -2.093479871749878, |
|
"loss": 1.3641, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -16.362751007080078, |
|
"rewards/margins": 4.572048664093018, |
|
"rewards/rejected": -20.93480110168457, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 96.95154393343023, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.3118140697479248, |
|
"logits/rejected": -1.2903715372085571, |
|
"logps/chosen": -1.6529546976089478, |
|
"logps/rejected": -2.148355007171631, |
|
"loss": 1.4766, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.529544830322266, |
|
"rewards/margins": 4.954004764556885, |
|
"rewards/rejected": -21.483551025390625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 91.51736686071692, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.3063311576843262, |
|
"logits/rejected": -1.2485519647598267, |
|
"logps/chosen": -1.6333932876586914, |
|
"logps/rejected": -2.1507859230041504, |
|
"loss": 1.3963, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.333934783935547, |
|
"rewards/margins": 5.173925876617432, |
|
"rewards/rejected": -21.507858276367188, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 97.07913178610919, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.2934232950210571, |
|
"logits/rejected": -1.2893450260162354, |
|
"logps/chosen": -1.6584867238998413, |
|
"logps/rejected": -2.2650082111358643, |
|
"loss": 1.3115, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -16.584867477416992, |
|
"rewards/margins": 6.065215110778809, |
|
"rewards/rejected": -22.650081634521484, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 65.85264295945626, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.326992392539978, |
|
"logits/rejected": -1.2749508619308472, |
|
"logps/chosen": -1.7087081670761108, |
|
"logps/rejected": -2.377331256866455, |
|
"loss": 1.307, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -17.087081909179688, |
|
"rewards/margins": 6.6862287521362305, |
|
"rewards/rejected": -23.773311614990234, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 71.08108071468983, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.2321017980575562, |
|
"logits/rejected": -1.1879392862319946, |
|
"logps/chosen": -1.6843183040618896, |
|
"logps/rejected": -2.170222759246826, |
|
"loss": 1.3597, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.843183517456055, |
|
"rewards/margins": 4.859041690826416, |
|
"rewards/rejected": -21.702226638793945, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 85.80290375242986, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.2848079204559326, |
|
"logits/rejected": -1.2643808126449585, |
|
"logps/chosen": -1.6716737747192383, |
|
"logps/rejected": -2.2179079055786133, |
|
"loss": 1.2118, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.716739654541016, |
|
"rewards/margins": 5.46234130859375, |
|
"rewards/rejected": -22.179079055786133, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 106.87884023285183, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.3009603023529053, |
|
"logits/rejected": -1.2630964517593384, |
|
"logps/chosen": -1.5984188318252563, |
|
"logps/rejected": -2.146073579788208, |
|
"loss": 1.3799, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.984187126159668, |
|
"rewards/margins": 5.476546287536621, |
|
"rewards/rejected": -21.460735321044922, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 81.57738599240237, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.3159044981002808, |
|
"logits/rejected": -1.2908227443695068, |
|
"logps/chosen": -1.6741054058074951, |
|
"logps/rejected": -2.3151228427886963, |
|
"loss": 1.4007, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.741056442260742, |
|
"rewards/margins": 6.4101715087890625, |
|
"rewards/rejected": -23.151227951049805, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 137.46788470613842, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.3111270666122437, |
|
"logits/rejected": -1.3133299350738525, |
|
"logps/chosen": -1.55172860622406, |
|
"logps/rejected": -2.0665595531463623, |
|
"loss": 1.3291, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.51728630065918, |
|
"rewards/margins": 5.148309707641602, |
|
"rewards/rejected": -20.66559410095215, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 73.0747912837978, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.2305195331573486, |
|
"logits/rejected": -1.2432688474655151, |
|
"logps/chosen": -1.5626884698867798, |
|
"logps/rejected": -2.1072001457214355, |
|
"loss": 1.159, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.626884460449219, |
|
"rewards/margins": 5.445114612579346, |
|
"rewards/rejected": -21.071998596191406, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 82.19549372560476, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.2950479984283447, |
|
"logits/rejected": -1.2918254137039185, |
|
"logps/chosen": -1.5626431703567505, |
|
"logps/rejected": -2.192157030105591, |
|
"loss": 1.3653, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -15.626432418823242, |
|
"rewards/margins": 6.295140266418457, |
|
"rewards/rejected": -21.921573638916016, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 82.26556152038766, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.3408092260360718, |
|
"logits/rejected": -1.2676836252212524, |
|
"logps/chosen": -1.621119737625122, |
|
"logps/rejected": -2.2568397521972656, |
|
"loss": 1.2936, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.211196899414062, |
|
"rewards/margins": 6.35720157623291, |
|
"rewards/rejected": -22.56839942932129, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 97.91298047906564, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.2643686532974243, |
|
"logits/rejected": -1.2498524188995361, |
|
"logps/chosen": -1.578148603439331, |
|
"logps/rejected": -2.067432403564453, |
|
"loss": 1.3095, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.781486511230469, |
|
"rewards/margins": 4.892836093902588, |
|
"rewards/rejected": -20.6743221282959, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 76.40375667456833, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.2894870042800903, |
|
"logits/rejected": -1.2930238246917725, |
|
"logps/chosen": -1.7195911407470703, |
|
"logps/rejected": -2.293926954269409, |
|
"loss": 1.2661, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -17.195911407470703, |
|
"rewards/margins": 5.7433576583862305, |
|
"rewards/rejected": -22.939268112182617, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 128.55014662844385, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.3014891147613525, |
|
"logits/rejected": -1.2802826166152954, |
|
"logps/chosen": -1.5879671573638916, |
|
"logps/rejected": -2.113447666168213, |
|
"loss": 1.4202, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -15.879669189453125, |
|
"rewards/margins": 5.25480842590332, |
|
"rewards/rejected": -21.134477615356445, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 77.57546829061782, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.310450792312622, |
|
"logits/rejected": -1.2848607301712036, |
|
"logps/chosen": -1.6349436044692993, |
|
"logps/rejected": -2.3224172592163086, |
|
"loss": 1.1354, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -16.349435806274414, |
|
"rewards/margins": 6.8747382164001465, |
|
"rewards/rejected": -23.224172592163086, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 75.76498018298135, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.3069926500320435, |
|
"logits/rejected": -1.289568305015564, |
|
"logps/chosen": -1.699163794517517, |
|
"logps/rejected": -2.2884535789489746, |
|
"loss": 1.3105, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.99163818359375, |
|
"rewards/margins": 5.8928985595703125, |
|
"rewards/rejected": -22.884536743164062, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 74.3951066976334, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.3087493181228638, |
|
"logits/rejected": -1.2855933904647827, |
|
"logps/chosen": -1.6192277669906616, |
|
"logps/rejected": -2.1721370220184326, |
|
"loss": 1.2688, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.192277908325195, |
|
"rewards/margins": 5.5290937423706055, |
|
"rewards/rejected": -21.721370697021484, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 63.10639530225684, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.3169952630996704, |
|
"logits/rejected": -1.2985506057739258, |
|
"logps/chosen": -1.6467092037200928, |
|
"logps/rejected": -2.164301633834839, |
|
"loss": 1.2114, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.467090606689453, |
|
"rewards/margins": 5.17592716217041, |
|
"rewards/rejected": -21.643016815185547, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.4850261211395264, |
|
"eval_logits/rejected": -1.493988037109375, |
|
"eval_logps/chosen": -1.664995551109314, |
|
"eval_logps/rejected": -2.2206830978393555, |
|
"eval_loss": 1.2490928173065186, |
|
"eval_rewards/accuracies": 0.8414633870124817, |
|
"eval_rewards/chosen": -16.64995574951172, |
|
"eval_rewards/margins": 5.556875228881836, |
|
"eval_rewards/rejected": -22.206830978393555, |
|
"eval_runtime": 95.4555, |
|
"eval_samples_per_second": 20.544, |
|
"eval_steps_per_second": 1.289, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 102.60713365281785, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.267327904701233, |
|
"logits/rejected": -1.2850300073623657, |
|
"logps/chosen": -1.7324796915054321, |
|
"logps/rejected": -2.2837843894958496, |
|
"loss": 1.2554, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -17.32479476928711, |
|
"rewards/margins": 5.513047218322754, |
|
"rewards/rejected": -22.83784294128418, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 159.07261192162792, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.2908105850219727, |
|
"logits/rejected": -1.2769014835357666, |
|
"logps/chosen": -1.6668212413787842, |
|
"logps/rejected": -2.2075092792510986, |
|
"loss": 1.3804, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.668210983276367, |
|
"rewards/margins": 5.406882286071777, |
|
"rewards/rejected": -22.075092315673828, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 76.85488373819665, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.2890928983688354, |
|
"logits/rejected": -1.2320820093154907, |
|
"logps/chosen": -1.5973718166351318, |
|
"logps/rejected": -2.237947940826416, |
|
"loss": 1.4163, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.973716735839844, |
|
"rewards/margins": 6.40576171875, |
|
"rewards/rejected": -22.37947654724121, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 92.42320617715352, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.3191107511520386, |
|
"logits/rejected": -1.2656759023666382, |
|
"logps/chosen": -1.5610657930374146, |
|
"logps/rejected": -2.152204990386963, |
|
"loss": 1.2658, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.610658645629883, |
|
"rewards/margins": 5.911390781402588, |
|
"rewards/rejected": -21.522048950195312, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 102.84147971960329, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.3186463117599487, |
|
"logits/rejected": -1.3073859214782715, |
|
"logps/chosen": -1.736202597618103, |
|
"logps/rejected": -2.2703185081481934, |
|
"loss": 1.4248, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.362024307250977, |
|
"rewards/margins": 5.34116268157959, |
|
"rewards/rejected": -22.70318603515625, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 108.04777919102577, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.2982522249221802, |
|
"logits/rejected": -1.2813619375228882, |
|
"logps/chosen": -1.7414271831512451, |
|
"logps/rejected": -2.3307671546936035, |
|
"loss": 1.2802, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.41427230834961, |
|
"rewards/margins": 5.893403053283691, |
|
"rewards/rejected": -23.30767250061035, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 85.6236171514638, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.262458324432373, |
|
"logits/rejected": -1.2183687686920166, |
|
"logps/chosen": -1.5962882041931152, |
|
"logps/rejected": -2.182863712310791, |
|
"loss": 1.222, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -15.962882995605469, |
|
"rewards/margins": 5.865753650665283, |
|
"rewards/rejected": -21.828638076782227, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 88.93173263482028, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.2786242961883545, |
|
"logits/rejected": -1.2167497873306274, |
|
"logps/chosen": -1.7170331478118896, |
|
"logps/rejected": -2.2510578632354736, |
|
"loss": 1.2694, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.170331954956055, |
|
"rewards/margins": 5.340245723724365, |
|
"rewards/rejected": -22.510578155517578, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 80.06878550984797, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.229853868484497, |
|
"logits/rejected": -1.2106773853302002, |
|
"logps/chosen": -1.688746690750122, |
|
"logps/rejected": -2.332123279571533, |
|
"loss": 1.1496, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -16.88746452331543, |
|
"rewards/margins": 6.433764457702637, |
|
"rewards/rejected": -23.321231842041016, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 87.87225651237878, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.3426064252853394, |
|
"logits/rejected": -1.3202402591705322, |
|
"logps/chosen": -1.6350570917129517, |
|
"logps/rejected": -2.1853957176208496, |
|
"loss": 1.3199, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.350570678710938, |
|
"rewards/margins": 5.503388404846191, |
|
"rewards/rejected": -21.853958129882812, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 97.584405727653, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.3094408512115479, |
|
"logits/rejected": -1.2717828750610352, |
|
"logps/chosen": -1.6554279327392578, |
|
"logps/rejected": -2.304875373840332, |
|
"loss": 1.0871, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -16.554279327392578, |
|
"rewards/margins": 6.494471549987793, |
|
"rewards/rejected": -23.048751831054688, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 94.99231466494224, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.3035484552383423, |
|
"logits/rejected": -1.2918545007705688, |
|
"logps/chosen": -1.7271077632904053, |
|
"logps/rejected": -2.327470541000366, |
|
"loss": 1.2419, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -17.27107810974121, |
|
"rewards/margins": 6.003628730773926, |
|
"rewards/rejected": -23.27470588684082, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 86.3036149732278, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.286787986755371, |
|
"logits/rejected": -1.2908694744110107, |
|
"logps/chosen": -1.7435226440429688, |
|
"logps/rejected": -2.3610475063323975, |
|
"loss": 1.3748, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -17.435226440429688, |
|
"rewards/margins": 6.175250053405762, |
|
"rewards/rejected": -23.610477447509766, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.8360214427400707, |
|
"train_runtime": 11486.9698, |
|
"train_samples_per_second": 5.213, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|