|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982003599280144, |
|
"eval_steps": 500, |
|
"global_step": 416, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.1904761904761906e-07, |
|
"logits/chosen": 0.06842132657766342, |
|
"logits/rejected": 0.05148967728018761, |
|
"logps/chosen": -254.1962432861328, |
|
"logps/rejected": -268.0105285644531, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"logits/chosen": 0.14202657341957092, |
|
"logits/rejected": 0.2216137945652008, |
|
"logps/chosen": -380.2115478515625, |
|
"logps/rejected": -331.30743408203125, |
|
"loss": 0.3702, |
|
"rewards/accuracies": 0.3819444477558136, |
|
"rewards/chosen": 5.156885163160041e-05, |
|
"rewards/margins": -8.286008232971653e-05, |
|
"rewards/rejected": 0.00013442893396131694, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.11058167368173599, |
|
"logits/rejected": 0.1396401971578598, |
|
"logps/chosen": -336.24676513671875, |
|
"logps/rejected": -305.8110656738281, |
|
"loss": 0.3688, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 2.6639914722181857e-05, |
|
"rewards/margins": 8.525094017386436e-07, |
|
"rewards/rejected": 2.578740895842202e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": 0.12251333147287369, |
|
"logits/rejected": 0.22839057445526123, |
|
"logps/chosen": -353.02825927734375, |
|
"logps/rejected": -329.4814147949219, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.00021826496231369674, |
|
"rewards/margins": 0.0004608921299222857, |
|
"rewards/rejected": -0.00024262710940092802, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.10691192001104355, |
|
"logits/rejected": 0.16547340154647827, |
|
"logps/chosen": -330.2546691894531, |
|
"logps/rejected": -321.59222412109375, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.000455420755315572, |
|
"rewards/margins": 0.0009521494503132999, |
|
"rewards/rejected": -0.0004967286949977279, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994357350311441e-06, |
|
"logits/chosen": 0.11281980574131012, |
|
"logits/rejected": 0.1559869647026062, |
|
"logps/chosen": -333.9978942871094, |
|
"logps/rejected": -305.3361511230469, |
|
"loss": 0.3648, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0016132129821926355, |
|
"rewards/margins": 0.002727704355493188, |
|
"rewards/rejected": -0.0011144911404699087, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97147773390341e-06, |
|
"logits/chosen": 0.1588975489139557, |
|
"logits/rejected": 0.22720813751220703, |
|
"logps/chosen": -328.2370300292969, |
|
"logps/rejected": -326.60650634765625, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.002356642857193947, |
|
"rewards/margins": 0.006414002738893032, |
|
"rewards/rejected": -0.004057359881699085, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931169703639282e-06, |
|
"logits/chosen": 0.12001170963048935, |
|
"logits/rejected": 0.2352440357208252, |
|
"logps/chosen": -363.2486267089844, |
|
"logps/rejected": -353.33795166015625, |
|
"loss": 0.3571, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.001306799822486937, |
|
"rewards/margins": 0.013176659122109413, |
|
"rewards/rejected": -0.011869858019053936, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.873717504456219e-06, |
|
"logits/chosen": 0.12399880588054657, |
|
"logits/rejected": 0.13322117924690247, |
|
"logps/chosen": -351.8349304199219, |
|
"logps/rejected": -346.0111999511719, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.01495267916470766, |
|
"rewards/margins": 0.018836025148630142, |
|
"rewards/rejected": -0.03378870338201523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7995262788689865e-06, |
|
"logits/chosen": 0.15232697129249573, |
|
"logits/rejected": 0.16080796718597412, |
|
"logps/chosen": -415.25408935546875, |
|
"logps/rejected": -472.15838623046875, |
|
"loss": 0.3288, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.060072846710681915, |
|
"rewards/margins": 0.05840995907783508, |
|
"rewards/rejected": -0.118482805788517, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.709119209978242e-06, |
|
"logits/chosen": 0.13195478916168213, |
|
"logits/rejected": 0.1627262532711029, |
|
"logps/chosen": -528.2550659179688, |
|
"logps/rejected": -582.0850219726562, |
|
"loss": 0.3394, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.16017559170722961, |
|
"rewards/margins": 0.08082611858844757, |
|
"rewards/rejected": -0.241001695394516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.603133832077953e-06, |
|
"logits/chosen": 0.07086379081010818, |
|
"logits/rejected": 0.13402250409126282, |
|
"logps/chosen": -478.28155517578125, |
|
"logps/rejected": -536.5116577148438, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1429453045129776, |
|
"rewards/margins": 0.08558131754398346, |
|
"rewards/rejected": -0.22852663695812225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482317534878901e-06, |
|
"logits/chosen": 0.04285923391580582, |
|
"logits/rejected": 0.1404789388179779, |
|
"logps/chosen": -488.8091735839844, |
|
"logps/rejected": -566.8741455078125, |
|
"loss": 0.304, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.165468230843544, |
|
"rewards/margins": 0.09716635942459106, |
|
"rewards/rejected": -0.2626345753669739, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3475222930516484e-06, |
|
"logits/chosen": 0.0852217823266983, |
|
"logits/rejected": 0.12218357622623444, |
|
"logps/chosen": -495.8277282714844, |
|
"logps/rejected": -630.5374145507812, |
|
"loss": 0.305, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.18193328380584717, |
|
"rewards/margins": 0.1324920505285263, |
|
"rewards/rejected": -0.3144252896308899, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.199698658255298e-06, |
|
"logits/chosen": 0.10588987171649933, |
|
"logits/rejected": 0.13151074945926666, |
|
"logps/chosen": -534.0150146484375, |
|
"logps/rejected": -573.1961059570312, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1705319881439209, |
|
"rewards/margins": 0.08429961651563644, |
|
"rewards/rejected": -0.25483161211013794, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039889056019159e-06, |
|
"logits/chosen": 0.14604777097702026, |
|
"logits/rejected": 0.17551526427268982, |
|
"logps/chosen": -508.1181640625, |
|
"logps/rejected": -601.8496704101562, |
|
"loss": 0.2995, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15790864825248718, |
|
"rewards/margins": 0.10467412322759628, |
|
"rewards/rejected": -0.2625827491283417, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869220434746509e-06, |
|
"logits/chosen": 0.11204711347818375, |
|
"logits/rejected": 0.12582647800445557, |
|
"logps/chosen": -511.799072265625, |
|
"logps/rejected": -630.4769287109375, |
|
"loss": 0.3083, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17171703279018402, |
|
"rewards/margins": 0.12071826308965683, |
|
"rewards/rejected": -0.29243525862693787, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.688896318678322e-06, |
|
"logits/chosen": 0.08149586617946625, |
|
"logits/rejected": 0.1699526309967041, |
|
"logps/chosen": -502.21331787109375, |
|
"logps/rejected": -597.2528076171875, |
|
"loss": 0.3003, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1475558578968048, |
|
"rewards/margins": 0.137506365776062, |
|
"rewards/rejected": -0.28506219387054443, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5001883208580668e-06, |
|
"logits/chosen": 0.06874342262744904, |
|
"logits/rejected": 0.17765206098556519, |
|
"logps/chosen": -517.6918334960938, |
|
"logps/rejected": -670.2382202148438, |
|
"loss": 0.2989, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.18062789738178253, |
|
"rewards/margins": 0.14384225010871887, |
|
"rewards/rejected": -0.3244701325893402, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.30442717594657e-06, |
|
"logits/chosen": 0.08455907553434372, |
|
"logits/rejected": 0.16155509650707245, |
|
"logps/chosen": -573.642333984375, |
|
"logps/rejected": -693.3612060546875, |
|
"loss": 0.2908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.21603624522686005, |
|
"rewards/margins": 0.11141183227300644, |
|
"rewards/rejected": -0.3274480700492859, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.102993356121938e-06, |
|
"logits/chosen": 0.13803192973136902, |
|
"logits/rejected": 0.13260796666145325, |
|
"logps/chosen": -504.03070068359375, |
|
"logps/rejected": -662.9249267578125, |
|
"loss": 0.2837, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.20684099197387695, |
|
"rewards/margins": 0.132938414812088, |
|
"rewards/rejected": -0.33977940678596497, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8973073362395e-06, |
|
"logits/chosen": 0.1248478889465332, |
|
"logits/rejected": 0.18150393664836884, |
|
"logps/chosen": -556.743896484375, |
|
"logps/rejected": -682.061279296875, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1968514621257782, |
|
"rewards/margins": 0.12893599271774292, |
|
"rewards/rejected": -0.3257874846458435, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6888195769001147e-06, |
|
"logits/chosen": 0.12090057134628296, |
|
"logits/rejected": 0.16103163361549377, |
|
"logps/chosen": -576.2421875, |
|
"logps/rejected": -636.4088134765625, |
|
"loss": 0.2942, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.17859044671058655, |
|
"rewards/margins": 0.11207801103591919, |
|
"rewards/rejected": -0.29066842794418335, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.479000296064417e-06, |
|
"logits/chosen": 0.11349457502365112, |
|
"logits/rejected": 0.17926748096942902, |
|
"logps/chosen": -535.904052734375, |
|
"logps/rejected": -646.5347900390625, |
|
"loss": 0.3018, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18949127197265625, |
|
"rewards/margins": 0.14192432165145874, |
|
"rewards/rejected": -0.3314156234264374, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.269329101341745e-06, |
|
"logits/chosen": 0.08516987413167953, |
|
"logits/rejected": 0.14970402419567108, |
|
"logps/chosen": -517.2757568359375, |
|
"logps/rejected": -607.4237060546875, |
|
"loss": 0.2892, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.17799392342567444, |
|
"rewards/margins": 0.11533834785223007, |
|
"rewards/rejected": -0.2933322489261627, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.06128455606496e-06, |
|
"logits/chosen": 0.08016245067119598, |
|
"logits/rejected": 0.16319520771503448, |
|
"logps/chosen": -497.9122619628906, |
|
"logps/rejected": -589.7242431640625, |
|
"loss": 0.3044, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.19338397681713104, |
|
"rewards/margins": 0.09803850203752518, |
|
"rewards/rejected": -0.2914224863052368, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.856333752729311e-06, |
|
"logits/chosen": 0.15101440250873566, |
|
"logits/rejected": 0.171871617436409, |
|
"logps/chosen": -571.2918090820312, |
|
"logps/rejected": -696.5802001953125, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.20878608524799347, |
|
"rewards/margins": 0.13470368087291718, |
|
"rewards/rejected": -0.34348976612091064, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6559219673215784e-06, |
|
"logits/chosen": 0.13930802047252655, |
|
"logits/rejected": 0.20976956188678741, |
|
"logps/chosen": -531.32177734375, |
|
"logps/rejected": -613.5001220703125, |
|
"loss": 0.2899, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.17231304943561554, |
|
"rewards/margins": 0.12125241756439209, |
|
"rewards/rejected": -0.29356545209884644, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4614624674952843e-06, |
|
"logits/chosen": 0.11396761983633041, |
|
"logits/rejected": 0.2314365655183792, |
|
"logps/chosen": -552.0916748046875, |
|
"logps/rejected": -652.0897827148438, |
|
"loss": 0.2926, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.18660280108451843, |
|
"rewards/margins": 0.11572613567113876, |
|
"rewards/rejected": -0.3023289442062378, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2743265464628787e-06, |
|
"logits/chosen": 0.06689377129077911, |
|
"logits/rejected": 0.1653607338666916, |
|
"logps/chosen": -559.4171142578125, |
|
"logps/rejected": -663.8862915039062, |
|
"loss": 0.2877, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.19495370984077454, |
|
"rewards/margins": 0.13509666919708252, |
|
"rewards/rejected": -0.33005034923553467, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0958338528840893e-06, |
|
"logits/chosen": 0.0707981139421463, |
|
"logits/rejected": 0.19608157873153687, |
|
"logps/chosen": -585.0418090820312, |
|
"logps/rejected": -708.06689453125, |
|
"loss": 0.2956, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.21173438429832458, |
|
"rewards/margins": 0.14347293972969055, |
|
"rewards/rejected": -0.3552073538303375, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.272430849423175e-07, |
|
"logits/chosen": 0.12051234394311905, |
|
"logits/rejected": 0.22985529899597168, |
|
"logps/chosen": -581.6112670898438, |
|
"logps/rejected": -700.2015380859375, |
|
"loss": 0.2826, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1966112107038498, |
|
"rewards/margins": 0.1535426378250122, |
|
"rewards/rejected": -0.3501538634300232, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.697431142327633e-07, |
|
"logits/chosen": 0.14221158623695374, |
|
"logits/rejected": 0.16759946942329407, |
|
"logps/chosen": -543.4645385742188, |
|
"logps/rejected": -653.2728271484375, |
|
"loss": 0.2983, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.19766229391098022, |
|
"rewards/margins": 0.13313183188438416, |
|
"rewards/rejected": -0.33079415559768677, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.244446020550182e-07, |
|
"logits/chosen": 0.1523372381925583, |
|
"logits/rejected": 0.17464013397693634, |
|
"logps/chosen": -559.0565185546875, |
|
"logps/rejected": -647.8815307617188, |
|
"loss": 0.2835, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.20685505867004395, |
|
"rewards/margins": 0.11294861137866974, |
|
"rewards/rejected": -0.3198036551475525, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.923721672305148e-07, |
|
"logits/chosen": 0.07727678120136261, |
|
"logits/rejected": 0.14975441992282867, |
|
"logps/chosen": -530.6942138671875, |
|
"logps/rejected": -737.3821411132812, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1940075010061264, |
|
"rewards/margins": 0.18392714858055115, |
|
"rewards/rejected": -0.37793463468551636, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7445716067596506e-07, |
|
"logits/chosen": 0.06651227176189423, |
|
"logits/rejected": 0.1357874870300293, |
|
"logps/chosen": -568.581298828125, |
|
"logps/rejected": -677.1904907226562, |
|
"loss": 0.2696, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22338159382343292, |
|
"rewards/margins": 0.14005926251411438, |
|
"rewards/rejected": -0.3634408414363861, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7153109768518926e-07, |
|
"logits/chosen": 0.12754273414611816, |
|
"logits/rejected": 0.18894067406654358, |
|
"logps/chosen": -551.6950073242188, |
|
"logps/rejected": -662.9625244140625, |
|
"loss": 0.2798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20485155284404755, |
|
"rewards/margins": 0.13481572270393372, |
|
"rewards/rejected": -0.33966726064682007, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8431979423369607e-07, |
|
"logits/chosen": 0.1290164738893509, |
|
"logits/rejected": 0.2077597826719284, |
|
"logps/chosen": -539.3775634765625, |
|
"logps/rejected": -648.92138671875, |
|
"loss": 0.3018, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22309288382530212, |
|
"rewards/margins": 0.11153991520404816, |
|
"rewards/rejected": -0.3346328139305115, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1343824865573422e-07, |
|
"logits/chosen": 0.12593218684196472, |
|
"logits/rejected": 0.1743309050798416, |
|
"logps/chosen": -534.8792114257812, |
|
"logps/rejected": -619.5510864257812, |
|
"loss": 0.3046, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2254379242658615, |
|
"rewards/margins": 0.10715726763010025, |
|
"rewards/rejected": -0.33259516954421997, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.9386304787299175e-08, |
|
"logits/chosen": 0.15326878428459167, |
|
"logits/rejected": 0.19667108356952667, |
|
"logps/chosen": -596.8463745117188, |
|
"logps/rejected": -687.6807861328125, |
|
"loss": 0.2838, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2249801903963089, |
|
"rewards/margins": 0.12209514528512955, |
|
"rewards/rejected": -0.34707534313201904, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2545127157831416e-08, |
|
"logits/chosen": 0.08709342032670975, |
|
"logits/rejected": 0.12705549597740173, |
|
"logps/chosen": -551.4010620117188, |
|
"logps/rejected": -672.7706298828125, |
|
"loss": 0.2969, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2128075808286667, |
|
"rewards/margins": 0.12216651439666748, |
|
"rewards/rejected": -0.33497413992881775, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.1745130869123564e-09, |
|
"logits/chosen": 0.07737437635660172, |
|
"logits/rejected": 0.13851606845855713, |
|
"logps/chosen": -584.75, |
|
"logps/rejected": -722.1080322265625, |
|
"loss": 0.2947, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22851315140724182, |
|
"rewards/margins": 0.13729806244373322, |
|
"rewards/rejected": -0.36581122875213623, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 416, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3107354554276054, |
|
"train_runtime": 5919.6286, |
|
"train_samples_per_second": 3.379, |
|
"train_steps_per_second": 0.07 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 416, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|