|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 7.822805657905783, |
|
"learning_rate": 6.382978723404255e-08, |
|
"logits/chosen": 0.06214674562215805, |
|
"logits/rejected": 0.03797388821840286, |
|
"logps/chosen": -0.2699491083621979, |
|
"logps/rejected": -0.26826155185699463, |
|
"loss": 1.2748, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.2699491083621979, |
|
"rewards/margins": -0.0016875670989975333, |
|
"rewards/rejected": -0.26826155185699463, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 5.0967725327137074, |
|
"learning_rate": 1.276595744680851e-07, |
|
"logits/chosen": -0.010526341386139393, |
|
"logits/rejected": -0.012353870086371899, |
|
"logps/chosen": -0.2696549892425537, |
|
"logps/rejected": -0.2676162123680115, |
|
"loss": 1.2725, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.2696549892425537, |
|
"rewards/margins": -0.0020388036500662565, |
|
"rewards/rejected": -0.2676162123680115, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 6.6390016305878055, |
|
"learning_rate": 1.9148936170212767e-07, |
|
"logits/chosen": 0.0009885445469990373, |
|
"logits/rejected": 0.00387256289832294, |
|
"logps/chosen": -0.2789618670940399, |
|
"logps/rejected": -0.2836909592151642, |
|
"loss": 1.2796, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.2789618670940399, |
|
"rewards/margins": 0.00472906231880188, |
|
"rewards/rejected": -0.2836909592151642, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 9.068174455913743, |
|
"learning_rate": 2.553191489361702e-07, |
|
"logits/chosen": -0.06325958669185638, |
|
"logits/rejected": -0.06925094127655029, |
|
"logps/chosen": -0.2819739878177643, |
|
"logps/rejected": -0.2899174988269806, |
|
"loss": 1.2759, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2819739878177643, |
|
"rewards/margins": 0.007943493314087391, |
|
"rewards/rejected": -0.2899174988269806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 4.867685031446897, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.06868849694728851, |
|
"logits/rejected": -0.04817543178796768, |
|
"logps/chosen": -0.25565916299819946, |
|
"logps/rejected": -0.2749556303024292, |
|
"loss": 1.2567, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.25565916299819946, |
|
"rewards/margins": 0.01929648406803608, |
|
"rewards/rejected": -0.2749556303024292, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 4.806810486248379, |
|
"learning_rate": 3.8297872340425535e-07, |
|
"logits/chosen": -0.014168953523039818, |
|
"logits/rejected": -0.00634436309337616, |
|
"logps/chosen": -0.2789873480796814, |
|
"logps/rejected": -0.2939203977584839, |
|
"loss": 1.2769, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2789873480796814, |
|
"rewards/margins": 0.01493304967880249, |
|
"rewards/rejected": -0.2939203977584839, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 7.01406965287447, |
|
"learning_rate": 4.4680851063829783e-07, |
|
"logits/chosen": -0.029415354132652283, |
|
"logits/rejected": -0.009010488167405128, |
|
"logps/chosen": -0.2785240411758423, |
|
"logps/rejected": -0.29580387473106384, |
|
"loss": 1.2752, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.2785240411758423, |
|
"rewards/margins": 0.01727980747818947, |
|
"rewards/rejected": -0.29580387473106384, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 5.373513139182376, |
|
"learning_rate": 5.106382978723404e-07, |
|
"logits/chosen": -0.06608792394399643, |
|
"logits/rejected": -0.07190172374248505, |
|
"logps/chosen": -0.26092082262039185, |
|
"logps/rejected": -0.2700851261615753, |
|
"loss": 1.2674, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.26092082262039185, |
|
"rewards/margins": 0.009164294227957726, |
|
"rewards/rejected": -0.2700851261615753, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 5.609168218812681, |
|
"learning_rate": 5.74468085106383e-07, |
|
"logits/chosen": -0.05338377505540848, |
|
"logits/rejected": -0.01094720046967268, |
|
"logps/chosen": -0.2855256199836731, |
|
"logps/rejected": -0.28623315691947937, |
|
"loss": 1.2738, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2855256199836731, |
|
"rewards/margins": 0.0007075363537296653, |
|
"rewards/rejected": -0.28623315691947937, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 4.336678776172025, |
|
"learning_rate": 5.999244704827519e-07, |
|
"logits/chosen": -0.017125016078352928, |
|
"logits/rejected": -0.004308671224862337, |
|
"logps/chosen": -0.28561219573020935, |
|
"logps/rejected": -0.299736350774765, |
|
"loss": 1.2688, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.28561219573020935, |
|
"rewards/margins": 0.014124127104878426, |
|
"rewards/rejected": -0.299736350774765, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 5.882470821439722, |
|
"learning_rate": 5.994630389303205e-07, |
|
"logits/chosen": 0.0162811242043972, |
|
"logits/rejected": -0.004544490482658148, |
|
"logps/chosen": -0.2731076776981354, |
|
"logps/rejected": -0.2809983193874359, |
|
"loss": 1.2631, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2731076776981354, |
|
"rewards/margins": 0.007890653796494007, |
|
"rewards/rejected": -0.2809983193874359, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 6.092605492824151, |
|
"learning_rate": 5.985827812395378e-07, |
|
"logits/chosen": -0.03923701494932175, |
|
"logits/rejected": -0.07081723213195801, |
|
"logps/chosen": -0.2858438491821289, |
|
"logps/rejected": -0.31485337018966675, |
|
"loss": 1.2628, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.2858438491821289, |
|
"rewards/margins": 0.029009530320763588, |
|
"rewards/rejected": -0.31485337018966675, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 5.246427419034069, |
|
"learning_rate": 5.972849285303804e-07, |
|
"logits/chosen": -0.024546677246689796, |
|
"logits/rejected": 0.03360120207071304, |
|
"logps/chosen": -0.29182225465774536, |
|
"logps/rejected": -0.31506821513175964, |
|
"loss": 1.2705, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.29182225465774536, |
|
"rewards/margins": 0.023245956748723984, |
|
"rewards/rejected": -0.31506821513175964, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 5.0996411858772115, |
|
"learning_rate": 5.955712959672177e-07, |
|
"logits/chosen": -0.016444489359855652, |
|
"logits/rejected": -0.020679041743278503, |
|
"logps/chosen": -0.29391151666641235, |
|
"logps/rejected": -0.3471246361732483, |
|
"loss": 1.2543, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.29391151666641235, |
|
"rewards/margins": 0.05321308970451355, |
|
"rewards/rejected": -0.3471246361732483, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 4.491905521876928, |
|
"learning_rate": 5.934442802201417e-07, |
|
"logits/chosen": 0.06254759430885315, |
|
"logits/rejected": 0.10311929881572723, |
|
"logps/chosen": -0.3031434714794159, |
|
"logps/rejected": -0.3355598449707031, |
|
"loss": 1.2679, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3031434714794159, |
|
"rewards/margins": 0.03241636976599693, |
|
"rewards/rejected": -0.3355598449707031, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 6.478010906716982, |
|
"learning_rate": 5.909068561130061e-07, |
|
"logits/chosen": -0.01297207735478878, |
|
"logits/rejected": -0.004632393829524517, |
|
"logps/chosen": -0.29228898882865906, |
|
"logps/rejected": -0.322248637676239, |
|
"loss": 1.2618, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.29228898882865906, |
|
"rewards/margins": 0.02995964325964451, |
|
"rewards/rejected": -0.322248637676239, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 4.650638031490373, |
|
"learning_rate": 5.879625724628667e-07, |
|
"logits/chosen": 0.005947749130427837, |
|
"logits/rejected": 0.021510040387511253, |
|
"logps/chosen": -0.2952747642993927, |
|
"logps/rejected": -0.3337419927120209, |
|
"loss": 1.257, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2952747642993927, |
|
"rewards/margins": 0.03846726939082146, |
|
"rewards/rejected": -0.3337419927120209, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 7.530524241094077, |
|
"learning_rate": 5.846155471166399e-07, |
|
"logits/chosen": 0.015343578532338142, |
|
"logits/rejected": 0.03540420904755592, |
|
"logps/chosen": -0.3116888105869293, |
|
"logps/rejected": -0.3682340085506439, |
|
"loss": 1.253, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3116888105869293, |
|
"rewards/margins": 0.05654525011777878, |
|
"rewards/rejected": -0.3682340085506439, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 6.23873980938112, |
|
"learning_rate": 5.808704611919212e-07, |
|
"logits/chosen": 0.0079043535515666, |
|
"logits/rejected": -0.009995353408157825, |
|
"logps/chosen": -0.30725741386413574, |
|
"logps/rejected": -0.3175857663154602, |
|
"loss": 1.2597, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.30725741386413574, |
|
"rewards/margins": 0.010328322649002075, |
|
"rewards/rejected": -0.3175857663154602, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 5.495931507709334, |
|
"learning_rate": 5.767325525300187e-07, |
|
"logits/chosen": 0.012924237176775932, |
|
"logits/rejected": 0.015158179216086864, |
|
"logps/chosen": -0.30597418546676636, |
|
"logps/rejected": -0.3576403558254242, |
|
"loss": 1.2572, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.30597418546676636, |
|
"rewards/margins": 0.051666177809238434, |
|
"rewards/rejected": -0.3576403558254242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 8.704066182889123, |
|
"learning_rate": 5.722076083703594e-07, |
|
"logits/chosen": -0.011864040978252888, |
|
"logits/rejected": -0.015826348215341568, |
|
"logps/chosen": -0.2861265540122986, |
|
"logps/rejected": -0.3439098000526428, |
|
"loss": 1.2455, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.2861265540122986, |
|
"rewards/margins": 0.057783275842666626, |
|
"rewards/rejected": -0.3439098000526428, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 6.081543266921472, |
|
"learning_rate": 5.673019572565103e-07, |
|
"logits/chosen": -0.024934740737080574, |
|
"logits/rejected": -0.036910589784383774, |
|
"logps/chosen": -0.29488444328308105, |
|
"logps/rejected": -0.3499029576778412, |
|
"loss": 1.2384, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.29488444328308105, |
|
"rewards/margins": 0.05501857399940491, |
|
"rewards/rejected": -0.3499029576778412, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 5.083503334201838, |
|
"learning_rate": 5.620224601851389e-07, |
|
"logits/chosen": 0.0035224161110818386, |
|
"logits/rejected": 0.001966515090316534, |
|
"logps/chosen": -0.30457058548927307, |
|
"logps/rejected": -0.35604608058929443, |
|
"loss": 1.2561, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.30457058548927307, |
|
"rewards/margins": 0.051475513726472855, |
|
"rewards/rejected": -0.35604608058929443, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 7.099362190379442, |
|
"learning_rate": 5.563765010102885e-07, |
|
"logits/chosen": -0.06543167680501938, |
|
"logits/rejected": -0.0410967655479908, |
|
"logps/chosen": -0.3293083906173706, |
|
"logps/rejected": -0.3675723075866699, |
|
"loss": 1.2552, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3293083906173706, |
|
"rewards/margins": 0.03826391324400902, |
|
"rewards/rejected": -0.3675723075866699, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 5.147990697882454, |
|
"learning_rate": 5.503719761163907e-07, |
|
"logits/chosen": -0.10343233495950699, |
|
"logits/rejected": -0.08113230764865875, |
|
"logps/chosen": -0.2962001860141754, |
|
"logps/rejected": -0.35733163356781006, |
|
"loss": 1.234, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.2962001860141754, |
|
"rewards/margins": 0.06113145500421524, |
|
"rewards/rejected": -0.35733163356781006, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 5.070905426510523, |
|
"learning_rate": 5.440172833744582e-07, |
|
"logits/chosen": -0.059284817427396774, |
|
"logits/rejected": -0.020249750465154648, |
|
"logps/chosen": -0.3295074701309204, |
|
"logps/rejected": -0.37299367785453796, |
|
"loss": 1.2644, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3295074701309204, |
|
"rewards/margins": 0.04348624125123024, |
|
"rewards/rejected": -0.37299367785453796, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 8.434154523088012, |
|
"learning_rate": 5.373213103969024e-07, |
|
"logits/chosen": -0.09271787106990814, |
|
"logits/rejected": -0.0978003442287445, |
|
"logps/chosen": -0.3175578713417053, |
|
"logps/rejected": -0.3870469629764557, |
|
"loss": 1.2466, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3175578713417053, |
|
"rewards/margins": 0.06948906183242798, |
|
"rewards/rejected": -0.3870469629764557, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 8.050573635697841, |
|
"learning_rate": 5.302934221074033e-07, |
|
"logits/chosen": -0.18472157418727875, |
|
"logits/rejected": -0.18296249210834503, |
|
"logps/chosen": -0.35015708208084106, |
|
"logps/rejected": -0.418283075094223, |
|
"loss": 1.2553, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.35015708208084106, |
|
"rewards/margins": 0.06812603026628494, |
|
"rewards/rejected": -0.418283075094223, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 5.537790942631876, |
|
"learning_rate": 5.229434476432182e-07, |
|
"logits/chosen": -0.04427188262343407, |
|
"logits/rejected": -0.07002754509449005, |
|
"logps/chosen": -0.3104066252708435, |
|
"logps/rejected": -0.36531931161880493, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3104066252708435, |
|
"rewards/margins": 0.05491270869970322, |
|
"rewards/rejected": -0.36531931161880493, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 13.46796536991619, |
|
"learning_rate": 5.152816666082435e-07, |
|
"logits/chosen": -0.09154470264911652, |
|
"logits/rejected": -0.10489149391651154, |
|
"logps/chosen": -0.3233293890953064, |
|
"logps/rejected": -0.42510905861854553, |
|
"loss": 1.2451, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3233293890953064, |
|
"rewards/margins": 0.10177962481975555, |
|
"rewards/rejected": -0.42510905861854553, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 7.7500923534994985, |
|
"learning_rate": 5.073187946960594e-07, |
|
"logits/chosen": -0.08783230930566788, |
|
"logits/rejected": -0.08713527768850327, |
|
"logps/chosen": -0.3179479241371155, |
|
"logps/rejected": -0.3679467737674713, |
|
"loss": 1.2488, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.3179479241371155, |
|
"rewards/margins": 0.04999883845448494, |
|
"rewards/rejected": -0.3679467737674713, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 7.025909512719848, |
|
"learning_rate": 4.990659687030634e-07, |
|
"logits/chosen": -0.1076837033033371, |
|
"logits/rejected": -0.08046683669090271, |
|
"logps/chosen": -0.31321102380752563, |
|
"logps/rejected": -0.3762710690498352, |
|
"loss": 1.2529, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31321102380752563, |
|
"rewards/margins": 0.06306007504463196, |
|
"rewards/rejected": -0.3762710690498352, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 8.01239629928041, |
|
"learning_rate": 4.905347309526536e-07, |
|
"logits/chosen": -0.06136934086680412, |
|
"logits/rejected": -0.07382142543792725, |
|
"logps/chosen": -0.29660579562187195, |
|
"logps/rejected": -0.37172654271125793, |
|
"loss": 1.2427, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.29660579562187195, |
|
"rewards/margins": 0.07512073218822479, |
|
"rewards/rejected": -0.37172654271125793, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 5.642901073511358, |
|
"learning_rate": 4.817370131522459e-07, |
|
"logits/chosen": -0.0563310906291008, |
|
"logits/rejected": -0.030183713883161545, |
|
"logps/chosen": -0.31852108240127563, |
|
"logps/rejected": -0.39116546511650085, |
|
"loss": 1.2442, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.31852108240127563, |
|
"rewards/margins": 0.07264441251754761, |
|
"rewards/rejected": -0.39116546511650085, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 5.485147120245967, |
|
"learning_rate": 4.7268511970570207e-07, |
|
"logits/chosen": -0.08929944038391113, |
|
"logits/rejected": -0.08389794826507568, |
|
"logps/chosen": -0.30848273634910583, |
|
"logps/rejected": -0.3659656345844269, |
|
"loss": 1.2505, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.30848273634910583, |
|
"rewards/margins": 0.05748288705945015, |
|
"rewards/rejected": -0.3659656345844269, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 7.991274503438784, |
|
"learning_rate": 4.6339171050450815e-07, |
|
"logits/chosen": -0.10145304352045059, |
|
"logits/rejected": -0.09222683310508728, |
|
"logps/chosen": -0.31844446063041687, |
|
"logps/rejected": -0.3609256148338318, |
|
"loss": 1.2546, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.31844446063041687, |
|
"rewards/margins": 0.042481135576963425, |
|
"rewards/rejected": -0.3609256148338318, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 5.832967942513168, |
|
"learning_rate": 4.5386978322177184e-07, |
|
"logits/chosen": -0.051486529409885406, |
|
"logits/rejected": -0.07657450437545776, |
|
"logps/chosen": -0.3131783604621887, |
|
"logps/rejected": -0.35496917366981506, |
|
"loss": 1.2627, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.3131783604621887, |
|
"rewards/margins": 0.04179079458117485, |
|
"rewards/rejected": -0.35496917366981506, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 8.218974322062975, |
|
"learning_rate": 4.4413265513380134e-07, |
|
"logits/chosen": -0.08528328686952591, |
|
"logits/rejected": -0.0627092644572258, |
|
"logps/chosen": -0.3054826855659485, |
|
"logps/rejected": -0.37131738662719727, |
|
"loss": 1.2456, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.3054826855659485, |
|
"rewards/margins": 0.06583467870950699, |
|
"rewards/rejected": -0.37131738662719727, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 8.281751397477004, |
|
"learning_rate": 4.3419394449468975e-07, |
|
"logits/chosen": -0.0632157102227211, |
|
"logits/rejected": -0.039062272757291794, |
|
"logps/chosen": -0.3401602506637573, |
|
"logps/rejected": -0.42965516448020935, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3401602506637573, |
|
"rewards/margins": 0.08949492126703262, |
|
"rewards/rejected": -0.42965516448020935, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 5.682547699952222, |
|
"learning_rate": 4.2406755148995617e-07, |
|
"logits/chosen": -0.036120522767305374, |
|
"logits/rejected": -0.00437445193529129, |
|
"logps/chosen": -0.30982089042663574, |
|
"logps/rejected": -0.3825121223926544, |
|
"loss": 1.2418, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.30982089042663574, |
|
"rewards/margins": 0.07269121706485748, |
|
"rewards/rejected": -0.3825121223926544, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 5.711897059557612, |
|
"learning_rate": 4.1376763879587855e-07, |
|
"logits/chosen": -0.08326585590839386, |
|
"logits/rejected": -0.12235681712627411, |
|
"logps/chosen": -0.34261685609817505, |
|
"logps/rejected": -0.4019942283630371, |
|
"loss": 1.2571, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.34261685609817505, |
|
"rewards/margins": 0.05937739089131355, |
|
"rewards/rejected": -0.4019942283630371, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 6.6702507208013895, |
|
"learning_rate": 4.0330861177171046e-07, |
|
"logits/chosen": -0.09191317856311798, |
|
"logits/rejected": -0.07536768168210983, |
|
"logps/chosen": -0.3210485577583313, |
|
"logps/rejected": -0.39176544547080994, |
|
"loss": 1.247, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3210485577583313, |
|
"rewards/margins": 0.07071693241596222, |
|
"rewards/rejected": -0.39176544547080994, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 6.330049251313848, |
|
"learning_rate": 3.927050983124842e-07, |
|
"logits/chosen": -0.025531485676765442, |
|
"logits/rejected": -0.07240410149097443, |
|
"logps/chosen": -0.29885441064834595, |
|
"logps/rejected": -0.391807496547699, |
|
"loss": 1.238, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.29885441064834595, |
|
"rewards/margins": 0.09295307099819183, |
|
"rewards/rejected": -0.391807496547699, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 6.737595050647263, |
|
"learning_rate": 3.8197192839057603e-07, |
|
"logits/chosen": -0.1071164608001709, |
|
"logits/rejected": -0.12290854752063751, |
|
"logps/chosen": -0.30930382013320923, |
|
"logps/rejected": -0.4362809658050537, |
|
"loss": 1.2382, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.30930382013320923, |
|
"rewards/margins": 0.12697716057300568, |
|
"rewards/rejected": -0.4362809658050537, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 8.559735052947849, |
|
"learning_rate": 3.7112411331464923e-07, |
|
"logits/chosen": -0.02524995245039463, |
|
"logits/rejected": -0.030149292200803757, |
|
"logps/chosen": -0.3149697184562683, |
|
"logps/rejected": -0.3956434428691864, |
|
"loss": 1.2272, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3149697184562683, |
|
"rewards/margins": 0.0806737095117569, |
|
"rewards/rejected": -0.3956434428691864, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 6.480287157306168, |
|
"learning_rate": 3.601768247349818e-07, |
|
"logits/chosen": -0.03261668235063553, |
|
"logits/rejected": -0.08516497910022736, |
|
"logps/chosen": -0.3169209659099579, |
|
"logps/rejected": -0.3873901069164276, |
|
"loss": 1.2404, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3169209659099579, |
|
"rewards/margins": 0.07046912610530853, |
|
"rewards/rejected": -0.3873901069164276, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 8.225368883810985, |
|
"learning_rate": 3.491453734245413e-07, |
|
"logits/chosen": -0.06573788821697235, |
|
"logits/rejected": -0.0159236378967762, |
|
"logps/chosen": -0.3394278287887573, |
|
"logps/rejected": -0.4536859393119812, |
|
"loss": 1.2409, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3394278287887573, |
|
"rewards/margins": 0.11425812542438507, |
|
"rewards/rejected": -0.4536859393119812, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 6.0501816929552135, |
|
"learning_rate": 3.3804518786548455e-07, |
|
"logits/chosen": -0.09407626837491989, |
|
"logits/rejected": -0.07616542279720306, |
|
"logps/chosen": -0.3101692199707031, |
|
"logps/rejected": -0.428670734167099, |
|
"loss": 1.2453, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3101692199707031, |
|
"rewards/margins": 0.11850155889987946, |
|
"rewards/rejected": -0.428670734167099, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 5.475578816065332, |
|
"learning_rate": 3.2689179267103006e-07, |
|
"logits/chosen": -0.1301025003194809, |
|
"logits/rejected": -0.12063749134540558, |
|
"logps/chosen": -0.3209839463233948, |
|
"logps/rejected": -0.3626781404018402, |
|
"loss": 1.2299, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3209839463233948, |
|
"rewards/margins": 0.04169422388076782, |
|
"rewards/rejected": -0.3626781404018402, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 7.848337245875008, |
|
"learning_rate": 3.1570078687288317e-07, |
|
"logits/chosen": -0.07203061133623123, |
|
"logits/rejected": -0.07748202979564667, |
|
"logps/chosen": -0.34228605031967163, |
|
"logps/rejected": -0.46370163559913635, |
|
"loss": 1.2274, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.34228605031967163, |
|
"rewards/margins": 0.12141555547714233, |
|
"rewards/rejected": -0.46370163559913635, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 8.463852280152306, |
|
"learning_rate": 3.0448782210457906e-07, |
|
"logits/chosen": -0.07813692837953568, |
|
"logits/rejected": -0.07056453824043274, |
|
"logps/chosen": -0.3696078360080719, |
|
"logps/rejected": -0.4517177939414978, |
|
"loss": 1.2467, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3696078360080719, |
|
"rewards/margins": 0.08210990577936172, |
|
"rewards/rejected": -0.4517177939414978, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 8.673679370712454, |
|
"learning_rate": 2.932685807112585e-07, |
|
"logits/chosen": -0.13425521552562714, |
|
"logits/rejected": -0.13180285692214966, |
|
"logps/chosen": -0.3235534727573395, |
|
"logps/rejected": -0.4138403534889221, |
|
"loss": 1.2392, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3235534727573395, |
|
"rewards/margins": 0.09028687328100204, |
|
"rewards/rejected": -0.4138403534889221, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 14.773817206579576, |
|
"learning_rate": 2.8205875381648974e-07, |
|
"logits/chosen": -0.10961911827325821, |
|
"logits/rejected": -0.10981354862451553, |
|
"logps/chosen": -0.31433889269828796, |
|
"logps/rejected": -0.40348243713378906, |
|
"loss": 1.2448, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.31433889269828796, |
|
"rewards/margins": 0.08914351463317871, |
|
"rewards/rejected": -0.40348243713378906, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 10.511806133556902, |
|
"learning_rate": 2.708740193768135e-07, |
|
"logits/chosen": -0.08152172714471817, |
|
"logits/rejected": -0.07667910307645798, |
|
"logps/chosen": -0.33281245827674866, |
|
"logps/rejected": -0.4943714141845703, |
|
"loss": 1.2277, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.33281245827674866, |
|
"rewards/margins": 0.16155894100666046, |
|
"rewards/rejected": -0.4943714141845703, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 5.764021925950384, |
|
"learning_rate": 2.597300202547034e-07, |
|
"logits/chosen": -0.0671951025724411, |
|
"logits/rejected": -0.09161119163036346, |
|
"logps/chosen": -0.3234054148197174, |
|
"logps/rejected": -0.36444562673568726, |
|
"loss": 1.2404, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3234054148197174, |
|
"rewards/margins": 0.04104021191596985, |
|
"rewards/rejected": -0.36444562673568726, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 6.254810248978168, |
|
"learning_rate": 2.4864234234060747e-07, |
|
"logits/chosen": -0.1233711987733841, |
|
"logits/rejected": -0.10507211834192276, |
|
"logps/chosen": -0.326472669839859, |
|
"logps/rejected": -0.4079364836215973, |
|
"loss": 1.2346, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.326472669839859, |
|
"rewards/margins": 0.08146381378173828, |
|
"rewards/rejected": -0.4079364836215973, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 8.145566662679744, |
|
"learning_rate": 2.3762649275467223e-07, |
|
"logits/chosen": -0.11552796512842178, |
|
"logits/rejected": -0.12833945453166962, |
|
"logps/chosen": -0.3270297944545746, |
|
"logps/rejected": -0.40440672636032104, |
|
"loss": 1.2498, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3270297944545746, |
|
"rewards/margins": 0.07737687975168228, |
|
"rewards/rejected": -0.40440672636032104, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 9.531079036404222, |
|
"learning_rate": 2.2669787815863174e-07, |
|
"logits/chosen": -0.03034001588821411, |
|
"logits/rejected": -0.0528348907828331, |
|
"logps/chosen": -0.3205064833164215, |
|
"logps/rejected": -0.4226464629173279, |
|
"loss": 1.2523, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3205064833164215, |
|
"rewards/margins": 0.10214000940322876, |
|
"rewards/rejected": -0.4226464629173279, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 5.413246554100629, |
|
"learning_rate": 2.1587178320819919e-07, |
|
"logits/chosen": -0.060756783932447433, |
|
"logits/rejected": -0.0011257051955908537, |
|
"logps/chosen": -0.27187207341194153, |
|
"logps/rejected": -0.3691639006137848, |
|
"loss": 1.2341, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.27187207341194153, |
|
"rewards/margins": 0.09729186445474625, |
|
"rewards/rejected": -0.3691639006137848, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 12.664849670753055, |
|
"learning_rate": 2.0516334917609277e-07, |
|
"logits/chosen": -0.10225675255060196, |
|
"logits/rejected": -0.04843712970614433, |
|
"logps/chosen": -0.3377472758293152, |
|
"logps/rejected": -0.5128234028816223, |
|
"loss": 1.2347, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3377472758293152, |
|
"rewards/margins": 0.17507611215114594, |
|
"rewards/rejected": -0.5128234028816223, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 7.8017671377878015, |
|
"learning_rate": 1.9458755277559716e-07, |
|
"logits/chosen": -0.11494015157222748, |
|
"logits/rejected": -0.10972355306148529, |
|
"logps/chosen": -0.3163761496543884, |
|
"logps/rejected": -0.4071407914161682, |
|
"loss": 1.2405, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3163761496543884, |
|
"rewards/margins": 0.09076462686061859, |
|
"rewards/rejected": -0.4071407914161682, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 6.034077165873482, |
|
"learning_rate": 1.8415918521427613e-07, |
|
"logits/chosen": -0.1821509748697281, |
|
"logits/rejected": -0.19082587957382202, |
|
"logps/chosen": -0.31090688705444336, |
|
"logps/rejected": -0.3758618235588074, |
|
"loss": 1.2455, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.31090688705444336, |
|
"rewards/margins": 0.06495492160320282, |
|
"rewards/rejected": -0.3758618235588074, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 10.235181182403219, |
|
"learning_rate": 1.7389283150713038e-07, |
|
"logits/chosen": -0.1251331865787506, |
|
"logits/rejected": -0.11890840530395508, |
|
"logps/chosen": -0.3588525056838989, |
|
"logps/rejected": -0.425645649433136, |
|
"loss": 1.2543, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.3588525056838989, |
|
"rewards/margins": 0.06679315119981766, |
|
"rewards/rejected": -0.425645649433136, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 9.723759702295695, |
|
"learning_rate": 1.6380285007813597e-07, |
|
"logits/chosen": -0.1123957484960556, |
|
"logits/rejected": -0.1257510930299759, |
|
"logps/chosen": -0.3145357668399811, |
|
"logps/rejected": -0.3408251404762268, |
|
"loss": 1.2609, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.3145357668399811, |
|
"rewards/margins": 0.026289362460374832, |
|
"rewards/rejected": -0.3408251404762268, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 7.286383815847668, |
|
"learning_rate": 1.539033526786898e-07, |
|
"logits/chosen": -0.1374741941690445, |
|
"logits/rejected": -0.11429701000452042, |
|
"logps/chosen": -0.32331573963165283, |
|
"logps/rejected": -0.4659709930419922, |
|
"loss": 1.2423, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.32331573963165283, |
|
"rewards/margins": 0.14265525341033936, |
|
"rewards/rejected": -0.4659709930419922, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 5.495961241803029, |
|
"learning_rate": 1.4420818465104924e-07, |
|
"logits/chosen": -0.1799645572900772, |
|
"logits/rejected": -0.17759008705615997, |
|
"logps/chosen": -0.30779215693473816, |
|
"logps/rejected": -0.3636534810066223, |
|
"loss": 1.2328, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.30779215693473816, |
|
"rewards/margins": 0.05586131289601326, |
|
"rewards/rejected": -0.3636534810066223, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 6.732402042819091, |
|
"learning_rate": 1.3473090556436928e-07, |
|
"logits/chosen": -0.09900529682636261, |
|
"logits/rejected": -0.11673985421657562, |
|
"logps/chosen": -0.32374444603919983, |
|
"logps/rejected": -0.42279139161109924, |
|
"loss": 1.2482, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.32374444603919983, |
|
"rewards/margins": 0.0990469679236412, |
|
"rewards/rejected": -0.42279139161109924, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 6.164055292584424, |
|
"learning_rate": 1.2548477025041833e-07, |
|
"logits/chosen": -0.17352089285850525, |
|
"logits/rejected": -0.15723419189453125, |
|
"logps/chosen": -0.31769293546676636, |
|
"logps/rejected": -0.4277707040309906, |
|
"loss": 1.2469, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.31769293546676636, |
|
"rewards/margins": 0.11007778346538544, |
|
"rewards/rejected": -0.4277707040309906, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 7.386683624949419, |
|
"learning_rate": 1.1648271026549805e-07, |
|
"logits/chosen": -0.16501447558403015, |
|
"logits/rejected": -0.17034907639026642, |
|
"logps/chosen": -0.3037567734718323, |
|
"logps/rejected": -0.4147283136844635, |
|
"loss": 1.235, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3037567734718323, |
|
"rewards/margins": 0.11097153276205063, |
|
"rewards/rejected": -0.4147283136844635, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 8.63440141406496, |
|
"learning_rate": 1.0773731580449275e-07, |
|
"logits/chosen": -0.0861009806394577, |
|
"logits/rejected": -0.10058856010437012, |
|
"logps/chosen": -0.3289971947669983, |
|
"logps/rejected": -0.43288707733154297, |
|
"loss": 1.2271, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3289971947669983, |
|
"rewards/margins": 0.10388988256454468, |
|
"rewards/rejected": -0.43288707733154297, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 5.491722459194082, |
|
"learning_rate": 9.926081809234262e-08, |
|
"logits/chosen": -0.1492873877286911, |
|
"logits/rejected": -0.14633427560329437, |
|
"logps/chosen": -0.3535214960575104, |
|
"logps/rejected": -0.5062969923019409, |
|
"loss": 1.2331, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3535214960575104, |
|
"rewards/margins": 0.15277548134326935, |
|
"rewards/rejected": -0.5062969923019409, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 16.598441497777067, |
|
"learning_rate": 9.106507227756998e-08, |
|
"logits/chosen": -0.10592007637023926, |
|
"logits/rejected": -0.1149587631225586, |
|
"logps/chosen": -0.355294406414032, |
|
"logps/rejected": -0.41237178444862366, |
|
"loss": 1.2541, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.355294406414032, |
|
"rewards/margins": 0.057077307254076004, |
|
"rewards/rejected": -0.41237178444862366, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 9.266627411766367, |
|
"learning_rate": 8.316154085178256e-08, |
|
"logits/chosen": -0.1599133014678955, |
|
"logits/rejected": -0.16612327098846436, |
|
"logps/chosen": -0.3537140488624573, |
|
"logps/rejected": -0.45664018392562866, |
|
"loss": 1.2415, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3537140488624573, |
|
"rewards/margins": 0.10292615741491318, |
|
"rewards/rejected": -0.45664018392562866, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 7.428406479509353, |
|
"learning_rate": 7.55612776183419e-08, |
|
"logits/chosen": -0.10595826804637909, |
|
"logits/rejected": -0.09110520780086517, |
|
"logps/chosen": -0.35074084997177124, |
|
"logps/rejected": -0.40927591919898987, |
|
"loss": 1.2357, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.35074084997177124, |
|
"rewards/margins": 0.05853506922721863, |
|
"rewards/rejected": -0.40927591919898987, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 8.282266775931964, |
|
"learning_rate": 6.827491223262017e-08, |
|
"logits/chosen": -0.14613883197307587, |
|
"logits/rejected": -0.1305559277534485, |
|
"logps/chosen": -0.33350640535354614, |
|
"logps/rejected": -0.3941604495048523, |
|
"loss": 1.2317, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.33350640535354614, |
|
"rewards/margins": 0.06065405532717705, |
|
"rewards/rejected": -0.3941604495048523, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 7.257639288893235, |
|
"learning_rate": 6.131263533546572e-08, |
|
"logits/chosen": -0.13168776035308838, |
|
"logits/rejected": -0.13572274148464203, |
|
"logps/chosen": -0.32469362020492554, |
|
"logps/rejected": -0.4485169053077698, |
|
"loss": 1.2319, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.32469362020492554, |
|
"rewards/margins": 0.12382327020168304, |
|
"rewards/rejected": -0.4485169053077698, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 8.794582511790273, |
|
"learning_rate": 5.468418430067059e-08, |
|
"logits/chosen": -0.13690440356731415, |
|
"logits/rejected": -0.11999843269586563, |
|
"logps/chosen": -0.3403404653072357, |
|
"logps/rejected": -0.41022801399230957, |
|
"loss": 1.2458, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3403404653072357, |
|
"rewards/margins": 0.06988750398159027, |
|
"rewards/rejected": -0.41022801399230957, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 6.914453118608116, |
|
"learning_rate": 4.839882961637282e-08, |
|
"logits/chosen": -0.14087721705436707, |
|
"logits/rejected": -0.11817269027233124, |
|
"logps/chosen": -0.32691115140914917, |
|
"logps/rejected": -0.39570215344429016, |
|
"loss": 1.2453, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.32691115140914917, |
|
"rewards/margins": 0.0687909945845604, |
|
"rewards/rejected": -0.39570215344429016, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 7.401358248565427, |
|
"learning_rate": 4.2465361919440165e-08, |
|
"logits/chosen": -0.17381078004837036, |
|
"logits/rejected": -0.1655048429965973, |
|
"logps/chosen": -0.32772788405418396, |
|
"logps/rejected": -0.36842280626296997, |
|
"loss": 1.2419, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.32772788405418396, |
|
"rewards/margins": 0.040694937109947205, |
|
"rewards/rejected": -0.36842280626296997, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 6.499476994247458, |
|
"learning_rate": 3.6892079700970036e-08, |
|
"logits/chosen": -0.19311991333961487, |
|
"logits/rejected": -0.1830570548772812, |
|
"logps/chosen": -0.34295058250427246, |
|
"logps/rejected": -0.37818074226379395, |
|
"loss": 1.2439, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.34295058250427246, |
|
"rewards/margins": 0.035230137407779694, |
|
"rewards/rejected": -0.37818074226379395, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": 0.01532436441630125, |
|
"eval_logits/rejected": 0.005963262636214495, |
|
"eval_logps/chosen": -0.3365793526172638, |
|
"eval_logps/rejected": -0.40151944756507874, |
|
"eval_loss": 1.2415482997894287, |
|
"eval_rewards/accuracies": 0.5873983502388, |
|
"eval_rewards/chosen": -0.3365793526172638, |
|
"eval_rewards/margins": 0.06494008004665375, |
|
"eval_rewards/rejected": -0.40151944756507874, |
|
"eval_runtime": 427.7978, |
|
"eval_samples_per_second": 4.584, |
|
"eval_steps_per_second": 0.288, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 8.655152423101507, |
|
"learning_rate": 3.1686777700099e-08, |
|
"logits/chosen": -0.11514046043157578, |
|
"logits/rejected": -0.16686634719371796, |
|
"logps/chosen": -0.3493112325668335, |
|
"logps/rejected": -0.37683025002479553, |
|
"loss": 1.245, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3493112325668335, |
|
"rewards/margins": 0.027519047260284424, |
|
"rewards/rejected": -0.37683025002479553, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 8.013257979238155, |
|
"learning_rate": 2.685673600235524e-08, |
|
"logits/chosen": -0.10536377131938934, |
|
"logits/rejected": -0.1320020854473114, |
|
"logps/chosen": -0.3498873710632324, |
|
"logps/rejected": -0.39043301343917847, |
|
"loss": 1.2607, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.3498873710632324, |
|
"rewards/margins": 0.04054565355181694, |
|
"rewards/rejected": -0.39043301343917847, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 12.61073944196688, |
|
"learning_rate": 2.2408709857800988e-08, |
|
"logits/chosen": -0.12514375150203705, |
|
"logits/rejected": -0.10413704812526703, |
|
"logps/chosen": -0.2854083478450775, |
|
"logps/rejected": -0.3893025517463684, |
|
"loss": 1.233, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.2854083478450775, |
|
"rewards/margins": 0.1038941740989685, |
|
"rewards/rejected": -0.3893025517463684, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 5.802098857336539, |
|
"learning_rate": 1.8348920233204167e-08, |
|
"logits/chosen": -0.08317883312702179, |
|
"logits/rejected": -0.06567595899105072, |
|
"logps/chosen": -0.3321346044540405, |
|
"logps/rejected": -0.4815450608730316, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3321346044540405, |
|
"rewards/margins": 0.14941047132015228, |
|
"rewards/rejected": -0.4815450608730316, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 5.211099670695838, |
|
"learning_rate": 1.468304511145394e-08, |
|
"logits/chosen": -0.02274451218545437, |
|
"logits/rejected": -0.0685218870639801, |
|
"logps/chosen": -0.3114772439002991, |
|
"logps/rejected": -0.4261551797389984, |
|
"loss": 1.2356, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3114772439002991, |
|
"rewards/margins": 0.11467792093753815, |
|
"rewards/rejected": -0.4261551797389984, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 5.865346057497481, |
|
"learning_rate": 1.1416211550388222e-08, |
|
"logits/chosen": -0.10939434915781021, |
|
"logits/rejected": -0.09104075282812119, |
|
"logps/chosen": -0.29765018820762634, |
|
"logps/rejected": -0.3629956841468811, |
|
"loss": 1.2343, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.29765018820762634, |
|
"rewards/margins": 0.06534545123577118, |
|
"rewards/rejected": -0.3629956841468811, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 7.151775664454521, |
|
"learning_rate": 8.552988512139748e-09, |
|
"logits/chosen": -0.09702922403812408, |
|
"logits/rejected": -0.11269289255142212, |
|
"logps/chosen": -0.3232804238796234, |
|
"logps/rejected": -0.4417162835597992, |
|
"loss": 1.2366, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3232804238796234, |
|
"rewards/margins": 0.11843589693307877, |
|
"rewards/rejected": -0.4417162835597992, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 19.71341285143898, |
|
"learning_rate": 6.097380473029356e-09, |
|
"logits/chosen": -0.13407650589942932, |
|
"logits/rejected": -0.1464676707983017, |
|
"logps/chosen": -0.33517464995384216, |
|
"logps/rejected": -0.3925517201423645, |
|
"loss": 1.2475, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.33517464995384216, |
|
"rewards/margins": 0.05737708881497383, |
|
"rewards/rejected": -0.3925517201423645, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 7.374118674662329, |
|
"learning_rate": 4.052821822943597e-09, |
|
"logits/chosen": -0.04786144942045212, |
|
"logits/rejected": -0.04718126356601715, |
|
"logps/chosen": -0.3143005967140198, |
|
"logps/rejected": -0.38420677185058594, |
|
"loss": 1.258, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3143005967140198, |
|
"rewards/margins": 0.06990616768598557, |
|
"rewards/rejected": -0.38420677185058594, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 7.332271422792315, |
|
"learning_rate": 2.4221720620301368e-09, |
|
"logits/chosen": -0.10571523010730743, |
|
"logits/rejected": -0.0989978164434433, |
|
"logps/chosen": -0.321834921836853, |
|
"logps/rejected": -0.40633755922317505, |
|
"loss": 1.2275, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.321834921836853, |
|
"rewards/margins": 0.08450265228748322, |
|
"rewards/rejected": -0.40633755922317505, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 7.032752985388992, |
|
"learning_rate": 1.2077118014282794e-09, |
|
"logits/chosen": -0.06323617696762085, |
|
"logits/rejected": -0.029714446514844894, |
|
"logps/chosen": -0.3421580493450165, |
|
"logps/rejected": -0.4160069525241852, |
|
"loss": 1.2498, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3421580493450165, |
|
"rewards/margins": 0.0738489031791687, |
|
"rewards/rejected": -0.4160069525241852, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 6.513587335089515, |
|
"learning_rate": 4.1113957362785e-10, |
|
"logits/chosen": -0.05799049139022827, |
|
"logits/rejected": -0.08265287429094315, |
|
"logps/chosen": -0.3295963406562805, |
|
"logps/rejected": -0.37454092502593994, |
|
"loss": 1.2494, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3295963406562805, |
|
"rewards/margins": 0.04494457319378853, |
|
"rewards/rejected": -0.37454092502593994, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 9.565280879421035, |
|
"learning_rate": 3.3569456917970085e-11, |
|
"logits/chosen": -0.030880967155098915, |
|
"logits/rejected": -0.05265098810195923, |
|
"logps/chosen": -0.31436887383461, |
|
"logps/rejected": -0.407276451587677, |
|
"loss": 1.2427, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31436887383461, |
|
"rewards/margins": 0.0929076224565506, |
|
"rewards/rejected": -0.407276451587677, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2480301234145237, |
|
"train_runtime": 21322.7535, |
|
"train_samples_per_second": 2.808, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|