|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9973828840617638, |
|
"eval_steps": 10000, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": 0.17704486846923828, |
|
"logits/rejected": 0.25409135222435, |
|
"logps/chosen": -354.4068603515625, |
|
"logps/rejected": -305.2366638183594, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -6.60312725813128e-05, |
|
"rewards/margins": 0.00012125837383791804, |
|
"rewards/rejected": -0.00018728969735093415, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": 0.07091161608695984, |
|
"logits/rejected": 0.1985362321138382, |
|
"logps/chosen": -316.65069580078125, |
|
"logps/rejected": -276.1200866699219, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0008458361262455583, |
|
"rewards/margins": 0.0016920112539082766, |
|
"rewards/rejected": -0.0008461751276627183, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 0.17787829041481018, |
|
"logits/rejected": 0.2488478720188141, |
|
"logps/chosen": -294.9706115722656, |
|
"logps/rejected": -298.59521484375, |
|
"loss": 0.1822, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.700423985719681e-05, |
|
"rewards/margins": 0.0029355171136558056, |
|
"rewards/rejected": -0.0029725211206823587, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": 0.09609868377447128, |
|
"logits/rejected": 0.21795693039894104, |
|
"logps/chosen": -347.44097900390625, |
|
"logps/rejected": -320.9972839355469, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0013125470140948892, |
|
"rewards/margins": 0.00661453977227211, |
|
"rewards/rejected": -0.005301993805915117, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": 0.1497882902622223, |
|
"logits/rejected": 0.240590900182724, |
|
"logps/chosen": -311.1229553222656, |
|
"logps/rejected": -286.51702880859375, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.005703258328139782, |
|
"rewards/margins": 0.022644545882940292, |
|
"rewards/rejected": -0.02834780514240265, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 0.13869214057922363, |
|
"logits/rejected": 0.28307411074638367, |
|
"logps/chosen": -295.9754638671875, |
|
"logps/rejected": -281.43798828125, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03096725046634674, |
|
"rewards/margins": 0.028959080576896667, |
|
"rewards/rejected": -0.059926338493824005, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": 0.18460798263549805, |
|
"logits/rejected": 0.2718513607978821, |
|
"logps/chosen": -335.46148681640625, |
|
"logps/rejected": -330.33404541015625, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.057377688586711884, |
|
"rewards/margins": 0.05648452043533325, |
|
"rewards/rejected": -0.11386220157146454, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": 0.29816848039627075, |
|
"logits/rejected": 0.4011983871459961, |
|
"logps/chosen": -330.4580383300781, |
|
"logps/rejected": -311.96490478515625, |
|
"loss": 0.159, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11794394254684448, |
|
"rewards/margins": 0.13102997839450836, |
|
"rewards/rejected": -0.24897389113903046, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": 0.2283201515674591, |
|
"logits/rejected": 0.37335914373397827, |
|
"logps/chosen": -358.6737365722656, |
|
"logps/rejected": -304.0804138183594, |
|
"loss": 0.1421, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.21732211112976074, |
|
"rewards/margins": 0.15273679792881012, |
|
"rewards/rejected": -0.37005892395973206, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.999463737538052e-07, |
|
"logits/chosen": 0.2938156723976135, |
|
"logits/rejected": 0.46553492546081543, |
|
"logps/chosen": -361.78338623046875, |
|
"logps/rejected": -343.25750732421875, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.27221935987472534, |
|
"rewards/margins": 0.23653486371040344, |
|
"rewards/rejected": -0.5087541937828064, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.993432105822034e-07, |
|
"logits/chosen": 0.31155580282211304, |
|
"logits/rejected": 0.3508353531360626, |
|
"logps/chosen": -353.184814453125, |
|
"logps/rejected": -366.32720947265625, |
|
"loss": 0.106, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.40565404295921326, |
|
"rewards/margins": 0.2631165683269501, |
|
"rewards/rejected": -0.6687706708908081, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.980706626858607e-07, |
|
"logits/chosen": 0.26659709215164185, |
|
"logits/rejected": 0.3288796842098236, |
|
"logps/chosen": -374.50274658203125, |
|
"logps/rejected": -403.8424377441406, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5394914150238037, |
|
"rewards/margins": 0.28696924448013306, |
|
"rewards/rejected": -0.8264607191085815, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.961304359538434e-07, |
|
"logits/chosen": 0.1616436094045639, |
|
"logits/rejected": 0.2970871031284332, |
|
"logps/chosen": -396.555419921875, |
|
"logps/rejected": -362.3848876953125, |
|
"loss": 0.0934, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5805934071540833, |
|
"rewards/margins": 0.19475166499614716, |
|
"rewards/rejected": -0.775344967842102, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.935251313189563e-07, |
|
"logits/chosen": 0.1485656201839447, |
|
"logits/rejected": 0.2714545428752899, |
|
"logps/chosen": -384.0659484863281, |
|
"logps/rejected": -346.6048278808594, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5202253460884094, |
|
"rewards/margins": 0.24675369262695312, |
|
"rewards/rejected": -0.766978919506073, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.902582412711118e-07, |
|
"logits/chosen": 0.12988325953483582, |
|
"logits/rejected": 0.1523539423942566, |
|
"logps/chosen": -379.16839599609375, |
|
"logps/rejected": -395.9466552734375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4386775493621826, |
|
"rewards/margins": 0.37129276990890503, |
|
"rewards/rejected": -0.8099702596664429, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.86334145175542e-07, |
|
"logits/chosen": 0.06655962765216827, |
|
"logits/rejected": 0.09024105966091156, |
|
"logps/chosen": -341.7105407714844, |
|
"logps/rejected": -360.19805908203125, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3964901566505432, |
|
"rewards/margins": 0.3985019028186798, |
|
"rewards/rejected": -0.7949920892715454, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.817581034021272e-07, |
|
"logits/chosen": 0.16973164677619934, |
|
"logits/rejected": 0.21836213767528534, |
|
"logps/chosen": -398.22369384765625, |
|
"logps/rejected": -417.8206481933594, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6185532808303833, |
|
"rewards/margins": 0.4811604917049408, |
|
"rewards/rejected": -1.0997138023376465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.765362502737097e-07, |
|
"logits/chosen": 0.09212584793567657, |
|
"logits/rejected": 0.23974208533763885, |
|
"logps/chosen": -388.64910888671875, |
|
"logps/rejected": -411.5782775878906, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6261709928512573, |
|
"rewards/margins": 0.4908596873283386, |
|
"rewards/rejected": -1.1170307397842407, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.706755858428485e-07, |
|
"logits/chosen": 0.1811675727367401, |
|
"logits/rejected": 0.27236208319664, |
|
"logps/chosen": -419.11376953125, |
|
"logps/rejected": -437.33843994140625, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8445426225662231, |
|
"rewards/margins": 0.4015916883945465, |
|
"rewards/rejected": -1.2461342811584473, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.641839665080363e-07, |
|
"logits/chosen": 0.14256766438484192, |
|
"logits/rejected": 0.2711044251918793, |
|
"logps/chosen": -414.55975341796875, |
|
"logps/rejected": -416.9037170410156, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7406997680664062, |
|
"rewards/margins": 0.48706990480422974, |
|
"rewards/rejected": -1.2277696132659912, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.570700944819582e-07, |
|
"logits/chosen": 0.23208096623420715, |
|
"logits/rejected": 0.35697174072265625, |
|
"logps/chosen": -382.19970703125, |
|
"logps/rejected": -386.50701904296875, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6804240942001343, |
|
"rewards/margins": 0.48590850830078125, |
|
"rewards/rejected": -1.166332721710205, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.493435061259129e-07, |
|
"logits/chosen": 0.13639363646507263, |
|
"logits/rejected": 0.23731064796447754, |
|
"logps/chosen": -382.42022705078125, |
|
"logps/rejected": -369.6554870605469, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6574115753173828, |
|
"rewards/margins": 0.40243881940841675, |
|
"rewards/rejected": -1.0598504543304443, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.4101455916603e-07, |
|
"logits/chosen": 0.1799091249704361, |
|
"logits/rejected": 0.2304597645998001, |
|
"logps/chosen": -416.672607421875, |
|
"logps/rejected": -420.39862060546875, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9061130285263062, |
|
"rewards/margins": 0.46666598320007324, |
|
"rewards/rejected": -1.3727790117263794, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.320944188084241e-07, |
|
"logits/chosen": 0.08318189531564713, |
|
"logits/rejected": 0.13486048579216003, |
|
"logps/chosen": -408.77545166015625, |
|
"logps/rejected": -427.9566345214844, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.968237042427063, |
|
"rewards/margins": 0.2922549843788147, |
|
"rewards/rejected": -1.260491967201233, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.225950427718974e-07, |
|
"logits/chosen": 0.051157813519239426, |
|
"logits/rejected": 0.1319509893655777, |
|
"logps/chosen": -385.2474670410156, |
|
"logps/rejected": -402.11126708984375, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7319932579994202, |
|
"rewards/margins": 0.468679815530777, |
|
"rewards/rejected": -1.2006731033325195, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.125291652582547e-07, |
|
"logits/chosen": 0.013853952288627625, |
|
"logits/rejected": 0.10071275383234024, |
|
"logps/chosen": -445.53607177734375, |
|
"logps/rejected": -434.2711486816406, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9089228510856628, |
|
"rewards/margins": 0.4331666827201843, |
|
"rewards/rejected": -1.3420894145965576, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.019102798817195e-07, |
|
"logits/chosen": 0.1297096163034439, |
|
"logits/rejected": 0.1613592505455017, |
|
"logps/chosen": -403.47393798828125, |
|
"logps/rejected": -446.1951599121094, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7434005737304688, |
|
"rewards/margins": 0.6140644550323486, |
|
"rewards/rejected": -1.357465147972107, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.90752621580335e-07, |
|
"logits/chosen": 0.16231071949005127, |
|
"logits/rejected": 0.1873283088207245, |
|
"logps/chosen": -362.4006652832031, |
|
"logps/rejected": -398.279296875, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6591774225234985, |
|
"rewards/margins": 0.41294485330581665, |
|
"rewards/rejected": -1.07212233543396, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.79071147533597e-07, |
|
"logits/chosen": 0.14204099774360657, |
|
"logits/rejected": 0.20997166633605957, |
|
"logps/chosen": -424.5856018066406, |
|
"logps/rejected": -456.9698181152344, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7815448045730591, |
|
"rewards/margins": 0.5602203011512756, |
|
"rewards/rejected": -1.34176504611969, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 8.668815171119019e-07, |
|
"logits/chosen": 0.2026984989643097, |
|
"logits/rejected": 0.23374077677726746, |
|
"logps/chosen": -380.8060607910156, |
|
"logps/rejected": -468.7802734375, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8905105590820312, |
|
"rewards/margins": 0.5638677477836609, |
|
"rewards/rejected": -1.454378366470337, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.54200070884685e-07, |
|
"logits/chosen": 0.23336808383464813, |
|
"logits/rejected": 0.25176650285720825, |
|
"logps/chosen": -385.24676513671875, |
|
"logps/rejected": -462.87322998046875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8951492309570312, |
|
"rewards/margins": 0.6165014505386353, |
|
"rewards/rejected": -1.5116506814956665, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.410438087153911e-07, |
|
"logits/chosen": 0.22913236916065216, |
|
"logits/rejected": 0.3360585570335388, |
|
"logps/chosen": -383.767578125, |
|
"logps/rejected": -424.25067138671875, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6813658475875854, |
|
"rewards/margins": 0.6591276526451111, |
|
"rewards/rejected": -1.3404934406280518, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": 0.22990348935127258, |
|
"logits/rejected": 0.3006184697151184, |
|
"logps/chosen": -366.43499755859375, |
|
"logps/rejected": -444.06536865234375, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6766657829284668, |
|
"rewards/margins": 0.6564770936965942, |
|
"rewards/rejected": -1.333142876625061, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.133779948881513e-07, |
|
"logits/chosen": 0.22257550060749054, |
|
"logits/rejected": 0.3241097033023834, |
|
"logps/chosen": -360.141845703125, |
|
"logps/rejected": -405.85711669921875, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7344536781311035, |
|
"rewards/margins": 0.7157880067825317, |
|
"rewards/rejected": -1.4502416849136353, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.989055300930704e-07, |
|
"logits/chosen": 0.1499968320131302, |
|
"logits/rejected": 0.15372925996780396, |
|
"logps/chosen": -388.67559814453125, |
|
"logps/rejected": -462.0445251464844, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8717344403266907, |
|
"rewards/margins": 0.6429644227027893, |
|
"rewards/rejected": -1.51469886302948, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.840323733655778e-07, |
|
"logits/chosen": 0.08885981142520905, |
|
"logits/rejected": 0.19541098177433014, |
|
"logps/chosen": -407.87286376953125, |
|
"logps/rejected": -420.4515686035156, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.797155499458313, |
|
"rewards/margins": 0.5855330228805542, |
|
"rewards/rejected": -1.3826884031295776, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.687784626235447e-07, |
|
"logits/chosen": 0.05912008136510849, |
|
"logits/rejected": 0.17702099680900574, |
|
"logps/chosen": -428.82354736328125, |
|
"logps/rejected": -466.0895080566406, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.803920567035675, |
|
"rewards/margins": 0.7507921457290649, |
|
"rewards/rejected": -1.5547125339508057, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.531642461971514e-07, |
|
"logits/chosen": 0.11388075351715088, |
|
"logits/rejected": 0.1931450068950653, |
|
"logps/chosen": -388.9282531738281, |
|
"logps/rejected": -427.1614685058594, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9585503339767456, |
|
"rewards/margins": 0.5912213325500488, |
|
"rewards/rejected": -1.5497716665267944, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.372106554172801e-07, |
|
"logits/chosen": -0.049389470368623734, |
|
"logits/rejected": 0.10218650102615356, |
|
"logps/chosen": -443.7737731933594, |
|
"logps/rejected": -484.5735778808594, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0208237171173096, |
|
"rewards/margins": 0.8150562047958374, |
|
"rewards/rejected": -1.835879921913147, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.209390765564318e-07, |
|
"logits/chosen": 0.07526848465204239, |
|
"logits/rejected": 0.1457681804895401, |
|
"logps/chosen": -430.77130126953125, |
|
"logps/rejected": -478.53118896484375, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.137662410736084, |
|
"rewards/margins": 0.6997725963592529, |
|
"rewards/rejected": -1.837435007095337, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.043713221597773e-07, |
|
"logits/chosen": -0.014962440356612206, |
|
"logits/rejected": 0.049673158675432205, |
|
"logps/chosen": -394.35980224609375, |
|
"logps/rejected": -455.79168701171875, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.0516221523284912, |
|
"rewards/margins": 0.6002627015113831, |
|
"rewards/rejected": -1.65188467502594, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.875296018047809e-07, |
|
"logits/chosen": 0.1113734096288681, |
|
"logits/rejected": 0.17297616600990295, |
|
"logps/chosen": -371.1769104003906, |
|
"logps/rejected": -433.82763671875, |
|
"loss": 0.057, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7784308791160583, |
|
"rewards/margins": 0.7032991647720337, |
|
"rewards/rejected": -1.4817302227020264, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.704364923285857e-07, |
|
"logits/chosen": 0.08021976053714752, |
|
"logits/rejected": 0.09611347317695618, |
|
"logps/chosen": -433.26898193359375, |
|
"logps/rejected": -482.2544860839844, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9919212460517883, |
|
"rewards/margins": 0.5928072333335876, |
|
"rewards/rejected": -1.584728479385376, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.531149075630796e-07, |
|
"logits/chosen": 0.06492827087640762, |
|
"logits/rejected": 0.09372309595346451, |
|
"logps/chosen": -369.0657958984375, |
|
"logps/rejected": -427.1637268066406, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8450859785079956, |
|
"rewards/margins": 0.6487796902656555, |
|
"rewards/rejected": -1.4938656091690063, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.355880676182085e-07, |
|
"logits/chosen": 0.015085640363395214, |
|
"logits/rejected": 0.1697283238172531, |
|
"logps/chosen": -454.42071533203125, |
|
"logps/rejected": -461.6656799316406, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0446925163269043, |
|
"rewards/margins": 0.7324589490890503, |
|
"rewards/rejected": -1.7771514654159546, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.178794677547137e-07, |
|
"logits/chosen": 0.052903078496456146, |
|
"logits/rejected": 0.21909013390541077, |
|
"logps/chosen": -389.771728515625, |
|
"logps/rejected": -432.63311767578125, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.918341338634491, |
|
"rewards/margins": 0.7504295706748962, |
|
"rewards/rejected": -1.6687707901000977, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.000128468880222e-07, |
|
"logits/chosen": 0.0020152360666543245, |
|
"logits/rejected": 0.10528425872325897, |
|
"logps/chosen": -439.73016357421875, |
|
"logps/rejected": -486.3055114746094, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0058103799819946, |
|
"rewards/margins": 0.8824182748794556, |
|
"rewards/rejected": -1.8882286548614502, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.820121557655108e-07, |
|
"logits/chosen": 0.03267590329051018, |
|
"logits/rejected": 0.10403893887996674, |
|
"logps/chosen": -426.3312072753906, |
|
"logps/rejected": -521.575439453125, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.897496223449707, |
|
"rewards/margins": 1.0473217964172363, |
|
"rewards/rejected": -1.9448179006576538, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.639015248598023e-07, |
|
"logits/chosen": -0.05066138505935669, |
|
"logits/rejected": 0.0016520231729373336, |
|
"logps/chosen": -459.2066955566406, |
|
"logps/rejected": -572.3805541992188, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.404326319694519, |
|
"rewards/margins": 1.2682745456695557, |
|
"rewards/rejected": -2.6726012229919434, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.457052320211339e-07, |
|
"logits/chosen": 0.10663177818059921, |
|
"logits/rejected": 0.143524631857872, |
|
"logps/chosen": -454.5547790527344, |
|
"logps/rejected": -574.3235473632812, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.592284083366394, |
|
"rewards/margins": 1.2184875011444092, |
|
"rewards/rejected": -2.8107717037200928, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.274476699321637e-07, |
|
"logits/chosen": -0.019788045436143875, |
|
"logits/rejected": 0.12656378746032715, |
|
"logps/chosen": -488.24627685546875, |
|
"logps/rejected": -596.00537109375, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8213142156600952, |
|
"rewards/margins": 1.3653538227081299, |
|
"rewards/rejected": -3.1866683959960938, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.091533134088387e-07, |
|
"logits/chosen": -0.0814504474401474, |
|
"logits/rejected": 0.05524957925081253, |
|
"logps/chosen": -552.7730712890625, |
|
"logps/rejected": -634.5548095703125, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0995850563049316, |
|
"rewards/margins": 1.1655638217926025, |
|
"rewards/rejected": -3.2651493549346924, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.908466865911614e-07, |
|
"logits/chosen": 0.03363295644521713, |
|
"logits/rejected": 0.043015364557504654, |
|
"logps/chosen": -468.89593505859375, |
|
"logps/rejected": -560.2864990234375, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5512639284133911, |
|
"rewards/margins": 1.2513355016708374, |
|
"rewards/rejected": -2.8025994300842285, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.7255233006783624e-07, |
|
"logits/chosen": -0.03754299506545067, |
|
"logits/rejected": 0.08725563436746597, |
|
"logps/chosen": -456.68243408203125, |
|
"logps/rejected": -549.9105224609375, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.657478928565979, |
|
"rewards/margins": 1.0530353784561157, |
|
"rewards/rejected": -2.7105140686035156, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.5429476797886617e-07, |
|
"logits/chosen": 0.0340617299079895, |
|
"logits/rejected": 0.1264275759458542, |
|
"logps/chosen": -469.5687561035156, |
|
"logps/rejected": -592.4705810546875, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.408406138420105, |
|
"rewards/margins": 1.4667712450027466, |
|
"rewards/rejected": -2.8751769065856934, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.3609847514019763e-07, |
|
"logits/chosen": 0.0167356226593256, |
|
"logits/rejected": 0.032135289162397385, |
|
"logps/chosen": -480.41278076171875, |
|
"logps/rejected": -577.2174072265625, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5578255653381348, |
|
"rewards/margins": 1.0947318077087402, |
|
"rewards/rejected": -2.652557611465454, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.179878442344892e-07, |
|
"logits/chosen": 0.10041844844818115, |
|
"logits/rejected": 0.16732005774974823, |
|
"logps/chosen": -453.9161071777344, |
|
"logps/rejected": -615.6796875, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7070415019989014, |
|
"rewards/margins": 1.4755295515060425, |
|
"rewards/rejected": -3.1825711727142334, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.9998715311197783e-07, |
|
"logits/chosen": 0.1310591995716095, |
|
"logits/rejected": 0.20585906505584717, |
|
"logps/chosen": -493.8118591308594, |
|
"logps/rejected": -631.4963989257812, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7850983142852783, |
|
"rewards/margins": 1.443263292312622, |
|
"rewards/rejected": -3.228361129760742, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.821205322452863e-07, |
|
"logits/chosen": 0.22954685986042023, |
|
"logits/rejected": 0.2483092099428177, |
|
"logps/chosen": -473.4378967285156, |
|
"logps/rejected": -605.134033203125, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.700280785560608, |
|
"rewards/margins": 1.460669755935669, |
|
"rewards/rejected": -3.1609506607055664, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.6441193238179146e-07, |
|
"logits/chosen": 0.13607949018478394, |
|
"logits/rejected": 0.1680508852005005, |
|
"logps/chosen": -451.55340576171875, |
|
"logps/rejected": -627.7686157226562, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6148862838745117, |
|
"rewards/margins": 1.678989052772522, |
|
"rewards/rejected": -3.2938759326934814, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.4688509243692034e-07, |
|
"logits/chosen": 0.04345204681158066, |
|
"logits/rejected": 0.13040025532245636, |
|
"logps/chosen": -461.54095458984375, |
|
"logps/rejected": -684.9581909179688, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6480602025985718, |
|
"rewards/margins": 1.6946277618408203, |
|
"rewards/rejected": -3.3426880836486816, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.295635076714144e-07, |
|
"logits/chosen": 0.18233785033226013, |
|
"logits/rejected": 0.19972297549247742, |
|
"logps/chosen": -408.9209899902344, |
|
"logps/rejected": -547.9658813476562, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6356074810028076, |
|
"rewards/margins": 1.3703811168670654, |
|
"rewards/rejected": -3.005988597869873, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.12470398195219e-07, |
|
"logits/chosen": 0.15017299354076385, |
|
"logits/rejected": 0.07167269289493561, |
|
"logps/chosen": -474.58172607421875, |
|
"logps/rejected": -649.4796142578125, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6831333637237549, |
|
"rewards/margins": 1.4837870597839355, |
|
"rewards/rejected": -3.1669201850891113, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.956286778402226e-07, |
|
"logits/chosen": 0.03866753727197647, |
|
"logits/rejected": 0.20129835605621338, |
|
"logps/chosen": -546.3468017578125, |
|
"logps/rejected": -608.462646484375, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7091865539550781, |
|
"rewards/margins": 1.3178246021270752, |
|
"rewards/rejected": -3.0270111560821533, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7906092344356826e-07, |
|
"logits/chosen": 0.2127591073513031, |
|
"logits/rejected": 0.24179625511169434, |
|
"logps/chosen": -462.47412109375, |
|
"logps/rejected": -581.084228515625, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.751960039138794, |
|
"rewards/margins": 1.4448457956314087, |
|
"rewards/rejected": -3.196805953979492, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6278934458271996e-07, |
|
"logits/chosen": 0.09269841015338898, |
|
"logits/rejected": 0.2964209318161011, |
|
"logps/chosen": -479.434326171875, |
|
"logps/rejected": -605.9524536132812, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8022867441177368, |
|
"rewards/margins": 1.3753817081451416, |
|
"rewards/rejected": -3.177668333053589, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.468357538028487e-07, |
|
"logits/chosen": 0.16141146421432495, |
|
"logits/rejected": 0.18542757630348206, |
|
"logps/chosen": -487.90277099609375, |
|
"logps/rejected": -652.5034790039062, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9332258701324463, |
|
"rewards/margins": 1.736053705215454, |
|
"rewards/rejected": -3.6692795753479004, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.312215373764551e-07, |
|
"logits/chosen": 0.07799498736858368, |
|
"logits/rejected": 0.17718131840229034, |
|
"logps/chosen": -603.2567138671875, |
|
"logps/rejected": -699.2156372070312, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1482930183410645, |
|
"rewards/margins": 1.3787685632705688, |
|
"rewards/rejected": -3.5270614624023438, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.1596762663442213e-07, |
|
"logits/chosen": 0.2014874666929245, |
|
"logits/rejected": 0.3246391713619232, |
|
"logps/chosen": -489.08349609375, |
|
"logps/rejected": -607.5847778320312, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.083740711212158, |
|
"rewards/margins": 1.446257472038269, |
|
"rewards/rejected": -3.5299980640411377, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.0109446990692963e-07, |
|
"logits/chosen": 0.09734896570444107, |
|
"logits/rejected": 0.16283641755580902, |
|
"logps/chosen": -540.1688232421875, |
|
"logps/rejected": -701.462890625, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.07643985748291, |
|
"rewards/margins": 1.7090556621551514, |
|
"rewards/rejected": -3.7854957580566406, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.8662200511184872e-07, |
|
"logits/chosen": 0.07912759482860565, |
|
"logits/rejected": 0.19963078200817108, |
|
"logps/chosen": -491.30426025390625, |
|
"logps/rejected": -630.0563354492188, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9977525472640991, |
|
"rewards/margins": 1.5802443027496338, |
|
"rewards/rejected": -3.5779967308044434, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": 0.14783975481987, |
|
"logits/rejected": 0.27563345432281494, |
|
"logps/chosen": -530.8796997070312, |
|
"logps/rejected": -640.3440551757812, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.036653518676758, |
|
"rewards/margins": 1.323557734489441, |
|
"rewards/rejected": -3.3602116107940674, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.589561912846089e-07, |
|
"logits/chosen": 0.16717246174812317, |
|
"logits/rejected": 0.2920343279838562, |
|
"logps/chosen": -499.3802795410156, |
|
"logps/rejected": -612.64892578125, |
|
"loss": 0.012, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0618550777435303, |
|
"rewards/margins": 1.435462236404419, |
|
"rewards/rejected": -3.4973175525665283, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4579992911531496e-07, |
|
"logits/chosen": 0.1249130517244339, |
|
"logits/rejected": 0.23616066575050354, |
|
"logps/chosen": -575.0750732421875, |
|
"logps/rejected": -649.9669189453125, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2815146446228027, |
|
"rewards/margins": 1.226216197013855, |
|
"rewards/rejected": -3.5077309608459473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3311848288809813e-07, |
|
"logits/chosen": 0.21837782859802246, |
|
"logits/rejected": 0.31546956300735474, |
|
"logps/chosen": -510.7059020996094, |
|
"logps/rejected": -609.2933959960938, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.790372610092163, |
|
"rewards/margins": 1.2426694631576538, |
|
"rewards/rejected": -3.0330421924591064, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.209288524664029e-07, |
|
"logits/chosen": 0.14562873542308807, |
|
"logits/rejected": 0.3084864318370819, |
|
"logps/chosen": -622.6912841796875, |
|
"logps/rejected": -749.8731689453125, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2252538204193115, |
|
"rewards/margins": 1.5818650722503662, |
|
"rewards/rejected": -3.8071188926696777, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0924737841966497e-07, |
|
"logits/chosen": 0.1799144446849823, |
|
"logits/rejected": 0.354133278131485, |
|
"logps/chosen": -585.0472412109375, |
|
"logps/rejected": -712.3133544921875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1570990085601807, |
|
"rewards/margins": 1.6586040258407593, |
|
"rewards/rejected": -3.8157036304473877, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.808972011828054e-08, |
|
"logits/chosen": 0.20896565914154053, |
|
"logits/rejected": 0.1832619458436966, |
|
"logps/chosen": -474.9366149902344, |
|
"logps/rejected": -665.3892822265625, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9308887720108032, |
|
"rewards/margins": 1.5281493663787842, |
|
"rewards/rejected": -3.459038257598877, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.747083474174527e-08, |
|
"logits/chosen": 0.25221484899520874, |
|
"logits/rejected": 0.3025228679180145, |
|
"logps/chosen": -486.76678466796875, |
|
"logps/rejected": -610.9810791015625, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9139289855957031, |
|
"rewards/margins": 1.4173685312271118, |
|
"rewards/rejected": -3.3312973976135254, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 7.740495722810269e-08, |
|
"logits/chosen": 0.12703558802604675, |
|
"logits/rejected": 0.25433093309402466, |
|
"logps/chosen": -528.8013916015625, |
|
"logps/rejected": -645.4374389648438, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.129984140396118, |
|
"rewards/margins": 1.322923183441162, |
|
"rewards/rejected": -3.452907085418701, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.790558119157597e-08, |
|
"logits/chosen": 0.1941952407360077, |
|
"logits/rejected": 0.36538344621658325, |
|
"logps/chosen": -536.0458374023438, |
|
"logps/rejected": -630.6697387695312, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9618316888809204, |
|
"rewards/margins": 1.3840124607086182, |
|
"rewards/rejected": -3.34584379196167, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.898544083397e-08, |
|
"logits/chosen": 0.1936766654253006, |
|
"logits/rejected": 0.22626741230487823, |
|
"logps/chosen": -482.18902587890625, |
|
"logps/rejected": -640.9258422851562, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.822951078414917, |
|
"rewards/margins": 1.679091215133667, |
|
"rewards/rejected": -3.502042055130005, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5.065649387408705e-08, |
|
"logits/chosen": 0.16037659347057343, |
|
"logits/rejected": 0.23867423832416534, |
|
"logps/chosen": -536.796630859375, |
|
"logps/rejected": -645.6795654296875, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.09273624420166, |
|
"rewards/margins": 1.3475998640060425, |
|
"rewards/rejected": -3.440336227416992, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.292990551804171e-08, |
|
"logits/chosen": 0.11955185234546661, |
|
"logits/rejected": 0.2987907826900482, |
|
"logps/chosen": -521.8675537109375, |
|
"logps/rejected": -622.3560791015625, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9727070331573486, |
|
"rewards/margins": 1.207002878189087, |
|
"rewards/rejected": -3.1797099113464355, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.581603349196371e-08, |
|
"logits/chosen": 0.12183141708374023, |
|
"logits/rejected": 0.24950018525123596, |
|
"logps/chosen": -529.2427978515625, |
|
"logps/rejected": -662.9299926757812, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.024509906768799, |
|
"rewards/margins": 1.5907318592071533, |
|
"rewards/rejected": -3.615241289138794, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.9324414157151367e-08, |
|
"logits/chosen": 0.11247365176677704, |
|
"logits/rejected": 0.28803473711013794, |
|
"logps/chosen": -538.6015625, |
|
"logps/rejected": -616.6097412109375, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.005286693572998, |
|
"rewards/margins": 1.320533037185669, |
|
"rewards/rejected": -3.325819492340088, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.3463749726290284e-08, |
|
"logits/chosen": 0.09726160764694214, |
|
"logits/rejected": 0.3085189759731293, |
|
"logps/chosen": -527.7420043945312, |
|
"logps/rejected": -666.7064208984375, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.963595986366272, |
|
"rewards/margins": 1.6061077117919922, |
|
"rewards/rejected": -3.5697035789489746, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.824189659787284e-08, |
|
"logits/chosen": 0.19652321934700012, |
|
"logits/rejected": 0.2885872423648834, |
|
"logps/chosen": -515.560546875, |
|
"logps/rejected": -641.10791015625, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9605176448822021, |
|
"rewards/margins": 1.3721264600753784, |
|
"rewards/rejected": -3.33264422416687, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3665854824458035e-08, |
|
"logits/chosen": 0.16733339428901672, |
|
"logits/rejected": 0.3634529113769531, |
|
"logps/chosen": -542.18505859375, |
|
"logps/rejected": -629.7310791015625, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0391831398010254, |
|
"rewards/margins": 1.1835925579071045, |
|
"rewards/rejected": -3.2227752208709717, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.741758728888217e-09, |
|
"logits/chosen": 0.08950433880090714, |
|
"logits/rejected": 0.2665843069553375, |
|
"logps/chosen": -533.1641845703125, |
|
"logps/rejected": -621.0523681640625, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9605424404144287, |
|
"rewards/margins": 1.1125773191452026, |
|
"rewards/rejected": -3.073119640350342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.474868681043577e-09, |
|
"logits/chosen": 0.13345034420490265, |
|
"logits/rejected": 0.2458508014678955, |
|
"logps/chosen": -523.0572509765625, |
|
"logps/rejected": -666.5548706054688, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.094968557357788, |
|
"rewards/margins": 1.4136923551559448, |
|
"rewards/rejected": -3.5086607933044434, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.869564046156459e-09, |
|
"logits/chosen": 0.17636564373970032, |
|
"logits/rejected": 0.24904970824718475, |
|
"logps/chosen": -521.7586669921875, |
|
"logps/rejected": -661.547119140625, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0953400135040283, |
|
"rewards/margins": 1.3953152894973755, |
|
"rewards/rejected": -3.4906551837921143, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.929337314139412e-09, |
|
"logits/chosen": 0.1708141714334488, |
|
"logits/rejected": 0.2874212861061096, |
|
"logps/chosen": -481.3929138183594, |
|
"logps/rejected": -591.492431640625, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8482071161270142, |
|
"rewards/margins": 1.3176212310791016, |
|
"rewards/rejected": -3.165828227996826, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.567894177967325e-10, |
|
"logits/chosen": 0.1810809224843979, |
|
"logits/rejected": 0.3499010503292084, |
|
"logps/chosen": -509.21966552734375, |
|
"logps/rejected": -619.0591430664062, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7878868579864502, |
|
"rewards/margins": 1.3797376155853271, |
|
"rewards/rejected": -3.1676242351531982, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.3626246194704575e-11, |
|
"logits/chosen": 0.12432925403118134, |
|
"logits/rejected": 0.1847553700208664, |
|
"logps/chosen": -471.4737854003906, |
|
"logps/rejected": -620.7115478515625, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8229620456695557, |
|
"rewards/margins": 1.5415856838226318, |
|
"rewards/rejected": -3.3645477294921875, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 954, |
|
"total_flos": 0.0, |
|
"train_loss": 0.050850671487596796, |
|
"train_runtime": 12712.7589, |
|
"train_samples_per_second": 9.618, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 954, |
|
"num_train_epochs": 2, |
|
"save_steps": 10000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|