|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.996400719856029, |
|
"eval_steps": 500, |
|
"global_step": 832, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.952380952380953e-08, |
|
"logits/chosen": 0.11703574657440186, |
|
"logits/rejected": 0.3661181330680847, |
|
"logps/chosen": -218.64993286132812, |
|
"logps/rejected": -191.34808349609375, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.952380952380953e-07, |
|
"logits/chosen": 0.10404814779758453, |
|
"logits/rejected": 0.23778128623962402, |
|
"logps/chosen": -401.4896240234375, |
|
"logps/rejected": -345.9862976074219, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.0004916194593533874, |
|
"rewards/margins": 0.0005594216636382043, |
|
"rewards/rejected": -6.780229159630835e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"logits/chosen": 0.13218173384666443, |
|
"logits/rejected": 0.20688870549201965, |
|
"logps/chosen": -336.506591796875, |
|
"logps/rejected": -319.3189392089844, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.00020826223772019148, |
|
"rewards/margins": -0.000311180017888546, |
|
"rewards/rejected": 0.0005194421974010766, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"logits/chosen": 0.11459924280643463, |
|
"logits/rejected": 0.1922653764486313, |
|
"logps/chosen": -342.02569580078125, |
|
"logps/rejected": -324.1275939941406, |
|
"loss": 0.3786, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0006439354037865996, |
|
"rewards/margins": 0.0004738263669423759, |
|
"rewards/rejected": -0.0011177618289366364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.13577614724636078, |
|
"logits/rejected": 0.17847472429275513, |
|
"logps/chosen": -298.6214294433594, |
|
"logps/rejected": -289.40850830078125, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0008146329782903194, |
|
"rewards/margins": 0.0024004268925637007, |
|
"rewards/rejected": -0.001585794030688703, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"logits/chosen": 0.10261678695678711, |
|
"logits/rejected": 0.20306341350078583, |
|
"logps/chosen": -351.93572998046875, |
|
"logps/rejected": -362.153564453125, |
|
"loss": 0.3692, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0009010158246383071, |
|
"rewards/margins": 0.004100508522242308, |
|
"rewards/rejected": -0.003199493046849966, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": 0.13770776987075806, |
|
"logits/rejected": 0.2188442498445511, |
|
"logps/chosen": -349.51690673828125, |
|
"logps/rejected": -351.1549377441406, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.003258631331846118, |
|
"rewards/margins": 0.007584023289382458, |
|
"rewards/rejected": -0.004325392190366983, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": 0.1271902620792389, |
|
"logits/rejected": 0.23070549964904785, |
|
"logps/chosen": -378.33843994140625, |
|
"logps/rejected": -350.60662841796875, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.006277147680521011, |
|
"rewards/margins": 0.015207210555672646, |
|
"rewards/rejected": -0.00893006194382906, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.08625562489032745, |
|
"logits/rejected": 0.12316304445266724, |
|
"logps/chosen": -307.9439697265625, |
|
"logps/rejected": -335.3281555175781, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.013669237494468689, |
|
"rewards/margins": 0.02226843498647213, |
|
"rewards/rejected": -0.008599198423326015, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9992062457191005e-06, |
|
"logits/chosen": 0.137899249792099, |
|
"logits/rejected": 0.2165641039609909, |
|
"logps/chosen": -355.6449890136719, |
|
"logps/rejected": -338.1387634277344, |
|
"loss": 0.3229, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.013719858601689339, |
|
"rewards/margins": 0.042457275092601776, |
|
"rewards/rejected": -0.028737416490912437, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.994357350311441e-06, |
|
"logits/chosen": 0.14011432230472565, |
|
"logits/rejected": 0.21795734763145447, |
|
"logps/chosen": -360.2173156738281, |
|
"logps/rejected": -358.1722717285156, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.001885895850136876, |
|
"rewards/margins": 0.06035756319761276, |
|
"rewards/rejected": -0.06224345415830612, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.98510907587894e-06, |
|
"logits/chosen": 0.13077042996883392, |
|
"logits/rejected": 0.21840377151966095, |
|
"logps/chosen": -356.6605224609375, |
|
"logps/rejected": -348.19476318359375, |
|
"loss": 0.3169, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.008259604685008526, |
|
"rewards/margins": 0.08336080610752106, |
|
"rewards/rejected": -0.09162042289972305, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.97147773390341e-06, |
|
"logits/chosen": 0.14791826903820038, |
|
"logits/rejected": 0.1786331683397293, |
|
"logps/chosen": -320.29608154296875, |
|
"logps/rejected": -337.16864013671875, |
|
"loss": 0.2861, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.008191597647964954, |
|
"rewards/margins": 0.09656454622745514, |
|
"rewards/rejected": -0.08837294578552246, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.953487366425163e-06, |
|
"logits/chosen": 0.12249626964330673, |
|
"logits/rejected": 0.16907112300395966, |
|
"logps/chosen": -342.0648498535156, |
|
"logps/rejected": -363.51031494140625, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0017940097022801638, |
|
"rewards/margins": 0.07947574555873871, |
|
"rewards/rejected": -0.07768173515796661, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.931169703639282e-06, |
|
"logits/chosen": 0.0919104740023613, |
|
"logits/rejected": 0.18652714788913727, |
|
"logps/chosen": -337.65374755859375, |
|
"logps/rejected": -364.11199951171875, |
|
"loss": 0.2828, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.03137553483247757, |
|
"rewards/margins": 0.12489553540945053, |
|
"rewards/rejected": -0.09352000057697296, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.904564107932048e-06, |
|
"logits/chosen": 0.13001379370689392, |
|
"logits/rejected": 0.20237913727760315, |
|
"logps/chosen": -351.857421875, |
|
"logps/rejected": -336.6232604980469, |
|
"loss": 0.2899, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0018621661001816392, |
|
"rewards/margins": 0.10416339337825775, |
|
"rewards/rejected": -0.10602555423974991, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.873717504456219e-06, |
|
"logits/chosen": 0.06932858377695084, |
|
"logits/rejected": 0.15127311646938324, |
|
"logps/chosen": -345.0473937988281, |
|
"logps/rejected": -363.4601745605469, |
|
"loss": 0.2889, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.01222093403339386, |
|
"rewards/margins": 0.11404307186603546, |
|
"rewards/rejected": -0.12626400589942932, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.838684298367616e-06, |
|
"logits/chosen": 0.16357803344726562, |
|
"logits/rejected": 0.23174886405467987, |
|
"logps/chosen": -357.15289306640625, |
|
"logps/rejected": -358.61065673828125, |
|
"loss": 0.2884, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.017432499676942825, |
|
"rewards/margins": 0.11707814782857895, |
|
"rewards/rejected": -0.09964564442634583, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7995262788689865e-06, |
|
"logits/chosen": 0.16258656978607178, |
|
"logits/rejected": 0.2536885738372803, |
|
"logps/chosen": -337.7535705566406, |
|
"logps/rejected": -346.13470458984375, |
|
"loss": 0.2789, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02853301540017128, |
|
"rewards/margins": 0.1016291156411171, |
|
"rewards/rejected": -0.07309609651565552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.756312510230377e-06, |
|
"logits/chosen": 0.14243337512016296, |
|
"logits/rejected": 0.24410876631736755, |
|
"logps/chosen": -376.64599609375, |
|
"logps/rejected": -363.4615478515625, |
|
"loss": 0.2828, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.03516390174627304, |
|
"rewards/margins": 0.12421919405460358, |
|
"rewards/rejected": -0.08905528485774994, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.709119209978242e-06, |
|
"logits/chosen": 0.17320121824741364, |
|
"logits/rejected": 0.2264091521501541, |
|
"logps/chosen": -362.0121765136719, |
|
"logps/rejected": -352.7041931152344, |
|
"loss": 0.283, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.039128489792346954, |
|
"rewards/margins": 0.11708054691553116, |
|
"rewards/rejected": -0.07795204222202301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6580296144681155e-06, |
|
"logits/chosen": 0.1604190617799759, |
|
"logits/rejected": 0.17792078852653503, |
|
"logps/chosen": -315.1614074707031, |
|
"logps/rejected": -340.53619384765625, |
|
"loss": 0.2754, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.05531097203493118, |
|
"rewards/margins": 0.15012916922569275, |
|
"rewards/rejected": -0.09481821954250336, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.603133832077953e-06, |
|
"logits/chosen": 0.11915634572505951, |
|
"logits/rejected": 0.15653367340564728, |
|
"logps/chosen": -351.16986083984375, |
|
"logps/rejected": -354.53607177734375, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.06388933956623077, |
|
"rewards/margins": 0.1507207453250885, |
|
"rewards/rejected": -0.08683140575885773, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.544528684281056e-06, |
|
"logits/chosen": 0.09443524479866028, |
|
"logits/rejected": 0.1415812075138092, |
|
"logps/chosen": -355.2025451660156, |
|
"logps/rejected": -349.1300354003906, |
|
"loss": 0.276, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021877283230423927, |
|
"rewards/margins": 0.1259470283985138, |
|
"rewards/rejected": -0.10406973212957382, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.482317534878901e-06, |
|
"logits/chosen": 0.08314280211925507, |
|
"logits/rejected": 0.11439633369445801, |
|
"logps/chosen": -333.59295654296875, |
|
"logps/rejected": -341.5171203613281, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.016557829454541206, |
|
"rewards/margins": 0.11629464477300644, |
|
"rewards/rejected": -0.09973680973052979, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.416610107695043e-06, |
|
"logits/chosen": 0.11690554767847061, |
|
"logits/rejected": 0.06475332379341125, |
|
"logps/chosen": -331.7200012207031, |
|
"logps/rejected": -341.45245361328125, |
|
"loss": 0.2819, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.025893816724419594, |
|
"rewards/margins": 0.13103850185871124, |
|
"rewards/rejected": -0.15693232417106628, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.3475222930516484e-06, |
|
"logits/chosen": 0.08940346539020538, |
|
"logits/rejected": 0.12766343355178833, |
|
"logps/chosen": -333.33343505859375, |
|
"logps/rejected": -372.55755615234375, |
|
"loss": 0.2833, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.026656050235033035, |
|
"rewards/margins": 0.16600963473320007, |
|
"rewards/rejected": -0.19266566634178162, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.2751759433699745e-06, |
|
"logits/chosen": 0.04847298935055733, |
|
"logits/rejected": 0.11083607375621796, |
|
"logps/chosen": -342.9352722167969, |
|
"logps/rejected": -357.6617736816406, |
|
"loss": 0.274, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.021627375856041908, |
|
"rewards/margins": 0.12919363379478455, |
|
"rewards/rejected": -0.1508210003376007, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.199698658255298e-06, |
|
"logits/chosen": 0.056878913193941116, |
|
"logits/rejected": 0.14858202636241913, |
|
"logps/chosen": -370.22637939453125, |
|
"logps/rejected": -398.57159423828125, |
|
"loss": 0.2715, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.021515587344765663, |
|
"rewards/margins": 0.1492767035961151, |
|
"rewards/rejected": -0.17079228162765503, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.121223559445343e-06, |
|
"logits/chosen": 0.03415738046169281, |
|
"logits/rejected": 0.12577436864376068, |
|
"logps/chosen": -352.68072509765625, |
|
"logps/rejected": -383.16204833984375, |
|
"loss": 0.264, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.03958406671881676, |
|
"rewards/margins": 0.1690487265586853, |
|
"rewards/rejected": -0.20863279700279236, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.039889056019159e-06, |
|
"logits/chosen": 0.02515377476811409, |
|
"logits/rejected": 0.10390216112136841, |
|
"logps/chosen": -353.2736511230469, |
|
"logps/rejected": -353.888671875, |
|
"loss": 0.2461, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.031048249453306198, |
|
"rewards/margins": 0.1348181664943695, |
|
"rewards/rejected": -0.1658664047718048, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.955838600280535e-06, |
|
"logits/chosen": 0.025213222950696945, |
|
"logits/rejected": 0.1410323679447174, |
|
"logps/chosen": -387.21856689453125, |
|
"logps/rejected": -373.70355224609375, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.010617800056934357, |
|
"rewards/margins": 0.19538867473602295, |
|
"rewards/rejected": -0.184770867228508, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.869220434746509e-06, |
|
"logits/chosen": 0.06151404231786728, |
|
"logits/rejected": 0.1290605366230011, |
|
"logps/chosen": -345.41571044921875, |
|
"logps/rejected": -370.25592041015625, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.019938651472330093, |
|
"rewards/margins": 0.16865777969360352, |
|
"rewards/rejected": -0.1885964572429657, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.7801873306872315e-06, |
|
"logits/chosen": 0.06525089591741562, |
|
"logits/rejected": 0.12144273519515991, |
|
"logps/chosen": -340.03277587890625, |
|
"logps/rejected": -371.6439514160156, |
|
"loss": 0.2577, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.02320241369307041, |
|
"rewards/margins": 0.17125853896141052, |
|
"rewards/rejected": -0.14805614948272705, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.688896318678322e-06, |
|
"logits/chosen": 0.055392809212207794, |
|
"logits/rejected": 0.12697988748550415, |
|
"logps/chosen": -349.14556884765625, |
|
"logps/rejected": -333.9625549316406, |
|
"loss": 0.2748, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.016882654279470444, |
|
"rewards/margins": 0.16859912872314453, |
|
"rewards/rejected": -0.1517164707183838, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5955084116409382e-06, |
|
"logits/chosen": 0.08919240534305573, |
|
"logits/rejected": 0.1610582321882248, |
|
"logps/chosen": -367.30621337890625, |
|
"logps/rejected": -346.13873291015625, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04106982424855232, |
|
"rewards/margins": 0.14082172513008118, |
|
"rewards/rejected": -0.1818915605545044, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5001883208580668e-06, |
|
"logits/chosen": 0.056862883269786835, |
|
"logits/rejected": 0.14601710438728333, |
|
"logps/chosen": -383.3697204589844, |
|
"logps/rejected": -388.45147705078125, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.005547699984163046, |
|
"rewards/margins": 0.20355132222175598, |
|
"rewards/rejected": -0.20909900963306427, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.403104165467883e-06, |
|
"logits/chosen": 0.047759585082530975, |
|
"logits/rejected": 0.1289873570203781, |
|
"logps/chosen": -363.989990234375, |
|
"logps/rejected": -361.4288330078125, |
|
"loss": 0.2491, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.030249441042542458, |
|
"rewards/margins": 0.1802445650100708, |
|
"rewards/rejected": -0.2104939967393875, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.30442717594657e-06, |
|
"logits/chosen": 0.06461011618375778, |
|
"logits/rejected": 0.14733566343784332, |
|
"logps/chosen": -350.331298828125, |
|
"logps/rejected": -334.6890563964844, |
|
"loss": 0.2754, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.04477550461888313, |
|
"rewards/margins": 0.12478353828191757, |
|
"rewards/rejected": -0.1695590317249298, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2043313921035747e-06, |
|
"logits/chosen": 0.07650026679039001, |
|
"logits/rejected": 0.10351625829935074, |
|
"logps/chosen": -319.55328369140625, |
|
"logps/rejected": -328.97625732421875, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.017551960423588753, |
|
"rewards/margins": 0.1492632031440735, |
|
"rewards/rejected": -0.1668151617050171, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.102993356121938e-06, |
|
"logits/chosen": 0.045068711042404175, |
|
"logits/rejected": 0.133053719997406, |
|
"logps/chosen": -376.1606750488281, |
|
"logps/rejected": -360.3962097167969, |
|
"loss": 0.2547, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.012314733117818832, |
|
"rewards/margins": 0.18502004444599152, |
|
"rewards/rejected": -0.19733479619026184, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0005918011851245e-06, |
|
"logits/chosen": 0.03985997289419174, |
|
"logits/rejected": 0.1656588464975357, |
|
"logps/chosen": -379.48199462890625, |
|
"logps/rejected": -362.08380126953125, |
|
"loss": 0.273, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.00483871391043067, |
|
"rewards/margins": 0.1729108989238739, |
|
"rewards/rejected": -0.16807220876216888, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.8973073362395e-06, |
|
"logits/chosen": 0.06932957470417023, |
|
"logits/rejected": 0.11695323139429092, |
|
"logps/chosen": -350.8485107421875, |
|
"logps/rejected": -359.5559387207031, |
|
"loss": 0.2562, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.015226135030388832, |
|
"rewards/margins": 0.13259340822696686, |
|
"rewards/rejected": -0.14781954884529114, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.7933221274484725e-06, |
|
"logits/chosen": 0.022776301950216293, |
|
"logits/rejected": 0.1463911086320877, |
|
"logps/chosen": -344.72900390625, |
|
"logps/rejected": -374.57110595703125, |
|
"loss": 0.2546, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.01165957935154438, |
|
"rewards/margins": 0.17338308691978455, |
|
"rewards/rejected": -0.1617235392332077, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.6888195769001147e-06, |
|
"logits/chosen": 0.011232647113502026, |
|
"logits/rejected": 0.08440439403057098, |
|
"logps/chosen": -315.56158447265625, |
|
"logps/rejected": -370.6732177734375, |
|
"loss": 0.2635, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0027101226150989532, |
|
"rewards/margins": 0.18474070727825165, |
|
"rewards/rejected": -0.1874508261680603, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.583983999134951e-06, |
|
"logits/chosen": 0.033940933644771576, |
|
"logits/rejected": 0.12383987754583359, |
|
"logps/chosen": -353.528076171875, |
|
"logps/rejected": -358.25433349609375, |
|
"loss": 0.2647, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.01790205016732216, |
|
"rewards/margins": 0.16596254706382751, |
|
"rewards/rejected": -0.18386459350585938, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.479000296064417e-06, |
|
"logits/chosen": 0.03699932247400284, |
|
"logits/rejected": 0.13089559972286224, |
|
"logps/chosen": -375.724609375, |
|
"logps/rejected": -400.3955383300781, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.041518934071063995, |
|
"rewards/margins": 0.1739250123500824, |
|
"rewards/rejected": -0.21544396877288818, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.374053630853358e-06, |
|
"logits/chosen": 0.07867871224880219, |
|
"logits/rejected": 0.0793570876121521, |
|
"logps/chosen": -392.0462646484375, |
|
"logps/rejected": -398.4570617675781, |
|
"loss": 0.2589, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.03614164516329765, |
|
"rewards/margins": 0.18701379001140594, |
|
"rewards/rejected": -0.2231554538011551, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.269329101341745e-06, |
|
"logits/chosen": 0.04767027124762535, |
|
"logits/rejected": 0.10338594764471054, |
|
"logps/chosen": -311.9175109863281, |
|
"logps/rejected": -353.84375, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.022265803068876266, |
|
"rewards/margins": 0.21186105906963348, |
|
"rewards/rejected": -0.18959525227546692, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.1650114135816052e-06, |
|
"logits/chosen": 0.04343586042523384, |
|
"logits/rejected": 0.14493630826473236, |
|
"logps/chosen": -368.74066162109375, |
|
"logps/rejected": -401.21746826171875, |
|
"loss": 0.254, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0024279176723212004, |
|
"rewards/margins": 0.1874578297138214, |
|
"rewards/rejected": -0.18988573551177979, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.06128455606496e-06, |
|
"logits/chosen": 0.04143913835287094, |
|
"logits/rejected": 0.06632859259843826, |
|
"logps/chosen": -320.82281494140625, |
|
"logps/rejected": -348.89923095703125, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0026562472339719534, |
|
"rewards/margins": 0.18748678267002106, |
|
"rewards/rejected": -0.19014303386211395, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.958331475217357e-06, |
|
"logits/chosen": 0.03532598540186882, |
|
"logits/rejected": 0.07111676037311554, |
|
"logps/chosen": -345.3083801269531, |
|
"logps/rejected": -391.5373840332031, |
|
"loss": 0.2428, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.011091398075222969, |
|
"rewards/margins": 0.18126052618026733, |
|
"rewards/rejected": -0.19235190749168396, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.856333752729311e-06, |
|
"logits/chosen": 0.06463773548603058, |
|
"logits/rejected": 0.07833746820688248, |
|
"logps/chosen": -303.89508056640625, |
|
"logps/rejected": -328.54095458984375, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.030626490712165833, |
|
"rewards/margins": 0.14131976664066315, |
|
"rewards/rejected": -0.17194625735282898, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.7554712852947915e-06, |
|
"logits/chosen": 0.017867419868707657, |
|
"logits/rejected": 0.13077208399772644, |
|
"logps/chosen": -354.83990478515625, |
|
"logps/rejected": -369.40447998046875, |
|
"loss": 0.2688, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.022668231278657913, |
|
"rewards/margins": 0.164995938539505, |
|
"rewards/rejected": -0.1876641809940338, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.6559219673215784e-06, |
|
"logits/chosen": 0.07014649361371994, |
|
"logits/rejected": 0.11957643926143646, |
|
"logps/chosen": -341.1030578613281, |
|
"logps/rejected": -360.0315246582031, |
|
"loss": 0.2559, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0031127408146858215, |
|
"rewards/margins": 0.17289015650749207, |
|
"rewards/rejected": -0.16977740824222565, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5578613771731214e-06, |
|
"logits/chosen": 0.044239241629838943, |
|
"logits/rejected": 0.11994221061468124, |
|
"logps/chosen": -347.32757568359375, |
|
"logps/rejected": -388.6127624511719, |
|
"loss": 0.244, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.0042419894598424435, |
|
"rewards/margins": 0.21681733429431915, |
|
"rewards/rejected": -0.22105932235717773, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4614624674952843e-06, |
|
"logits/chosen": 0.07131338119506836, |
|
"logits/rejected": 0.14118310809135437, |
|
"logps/chosen": -381.21112060546875, |
|
"logps/rejected": -375.3702087402344, |
|
"loss": 0.2594, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.01365007646381855, |
|
"rewards/margins": 0.16313722729682922, |
|
"rewards/rejected": -0.17678730189800262, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.3668952601741442e-06, |
|
"logits/chosen": 0.019948173314332962, |
|
"logits/rejected": 0.14301837980747223, |
|
"logps/chosen": -359.31829833984375, |
|
"logps/rejected": -386.3388366699219, |
|
"loss": 0.2421, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.003145938040688634, |
|
"rewards/margins": 0.17547301948070526, |
|
"rewards/rejected": -0.17861898243427277, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.2743265464628787e-06, |
|
"logits/chosen": 0.04147445410490036, |
|
"logits/rejected": 0.07641445100307465, |
|
"logps/chosen": -358.9191589355469, |
|
"logps/rejected": -354.82989501953125, |
|
"loss": 0.2574, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.03237663954496384, |
|
"rewards/margins": 0.14051951467990875, |
|
"rewards/rejected": -0.17289616167545319, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1839195928066101e-06, |
|
"logits/chosen": 0.010291008278727531, |
|
"logits/rejected": 0.08601720631122589, |
|
"logps/chosen": -338.0829162597656, |
|
"logps/rejected": -349.2616882324219, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.012054244987666607, |
|
"rewards/margins": 0.18035855889320374, |
|
"rewards/rejected": -0.19241279363632202, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0958338528840893e-06, |
|
"logits/chosen": 0.07830692082643509, |
|
"logits/rejected": 0.1112513542175293, |
|
"logps/chosen": -318.32928466796875, |
|
"logps/rejected": -351.01531982421875, |
|
"loss": 0.2642, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.012662211433053017, |
|
"rewards/margins": 0.15172497928142548, |
|
"rewards/rejected": -0.16438719630241394, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0102246863740498e-06, |
|
"logits/chosen": 0.013798505067825317, |
|
"logits/rejected": 0.13072696328163147, |
|
"logps/chosen": -326.76336669921875, |
|
"logps/rejected": -380.63458251953125, |
|
"loss": 0.2398, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0045418571680784225, |
|
"rewards/margins": 0.19731177389621735, |
|
"rewards/rejected": -0.20185360312461853, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.272430849423175e-07, |
|
"logits/chosen": 0.041550200432538986, |
|
"logits/rejected": 0.12003109604120255, |
|
"logps/chosen": -350.9006652832031, |
|
"logps/rejected": -404.7802734375, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.015362398698925972, |
|
"rewards/margins": 0.22952251136302948, |
|
"rewards/rejected": -0.21416012942790985, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.470354059328919e-07, |
|
"logits/chosen": 0.104413703083992, |
|
"logits/rejected": 0.11118074506521225, |
|
"logps/chosen": -336.5838928222656, |
|
"logps/rejected": -373.56085205078125, |
|
"loss": 0.2452, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.010279458947479725, |
|
"rewards/margins": 0.2295042723417282, |
|
"rewards/rejected": -0.21922484040260315, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.697431142327633e-07, |
|
"logits/chosen": 0.07976067811250687, |
|
"logits/rejected": 0.12730778753757477, |
|
"logps/chosen": -348.73443603515625, |
|
"logps/rejected": -358.34088134765625, |
|
"loss": 0.2338, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.030282145366072655, |
|
"rewards/margins": 0.16269627213478088, |
|
"rewards/rejected": -0.1929783970117569, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.955025327656839e-07, |
|
"logits/chosen": 0.04196876287460327, |
|
"logits/rejected": 0.11756552755832672, |
|
"logps/chosen": -327.8496398925781, |
|
"logps/rejected": -355.4369201660156, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0020761913619935513, |
|
"rewards/margins": 0.17507974803447723, |
|
"rewards/rejected": -0.17300358414649963, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.244446020550182e-07, |
|
"logits/chosen": 0.05316174030303955, |
|
"logits/rejected": 0.10895484685897827, |
|
"logps/chosen": -354.5049133300781, |
|
"logps/rejected": -411.59765625, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.0010157767683267593, |
|
"rewards/margins": 0.21365651488304138, |
|
"rewards/rejected": -0.2146722972393036, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.566946492796766e-07, |
|
"logits/chosen": 0.07230822741985321, |
|
"logits/rejected": 0.09754084050655365, |
|
"logps/chosen": -368.22802734375, |
|
"logps/rejected": -368.54974365234375, |
|
"loss": 0.2451, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02271811105310917, |
|
"rewards/margins": 0.14353466033935547, |
|
"rewards/rejected": -0.16625277698040009, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.923721672305148e-07, |
|
"logits/chosen": 0.04747115820646286, |
|
"logits/rejected": 0.10951533168554306, |
|
"logps/chosen": -373.25653076171875, |
|
"logps/rejected": -403.66619873046875, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.2638385164318606e-05, |
|
"rewards/margins": 0.20511355996131897, |
|
"rewards/rejected": -0.2051461637020111, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.3159060355700943e-07, |
|
"logits/chosen": 0.007146243005990982, |
|
"logits/rejected": 0.15595687925815582, |
|
"logps/chosen": -360.5429382324219, |
|
"logps/rejected": -360.84271240234375, |
|
"loss": 0.2528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.026043469086289406, |
|
"rewards/margins": 0.19069012999534607, |
|
"rewards/rejected": -0.21673360466957092, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.7445716067596506e-07, |
|
"logits/chosen": -0.016133427619934082, |
|
"logits/rejected": 0.06616418063640594, |
|
"logps/chosen": -315.7747497558594, |
|
"logps/rejected": -344.2303771972656, |
|
"loss": 0.242, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.012198897078633308, |
|
"rewards/margins": 0.2178380936384201, |
|
"rewards/rejected": -0.20563916862010956, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.2107260669512334e-07, |
|
"logits/chosen": 0.06611919403076172, |
|
"logits/rejected": 0.08203423768281937, |
|
"logps/chosen": -342.01263427734375, |
|
"logps/rejected": -353.5125427246094, |
|
"loss": 0.2461, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.01212338637560606, |
|
"rewards/margins": 0.17198148369789124, |
|
"rewards/rejected": -0.18410487473011017, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7153109768518926e-07, |
|
"logits/chosen": 0.05342602729797363, |
|
"logits/rejected": 0.11405602842569351, |
|
"logps/chosen": -393.02593994140625, |
|
"logps/rejected": -416.9335021972656, |
|
"loss": 0.244, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.015018805861473083, |
|
"rewards/margins": 0.2232932150363922, |
|
"rewards/rejected": -0.2383120059967041, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2592001161370392e-07, |
|
"logits/chosen": 0.059743158519268036, |
|
"logits/rejected": 0.08855228126049042, |
|
"logps/chosen": -365.6115417480469, |
|
"logps/rejected": -373.24310302734375, |
|
"loss": 0.2413, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.007994825020432472, |
|
"rewards/margins": 0.19029465317726135, |
|
"rewards/rejected": -0.19828948378562927, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.8431979423369607e-07, |
|
"logits/chosen": 0.01501550804823637, |
|
"logits/rejected": 0.09877587854862213, |
|
"logps/chosen": -335.7201232910156, |
|
"logps/rejected": -356.1680603027344, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.013049180619418621, |
|
"rewards/margins": 0.1567631959915161, |
|
"rewards/rejected": -0.16981235146522522, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.468038171988881e-07, |
|
"logits/chosen": -0.008327131159603596, |
|
"logits/rejected": 0.04639572650194168, |
|
"logps/chosen": -354.1353759765625, |
|
"logps/rejected": -387.98297119140625, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.02448558434844017, |
|
"rewards/margins": 0.1901397705078125, |
|
"rewards/rejected": -0.21462532877922058, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1343824865573422e-07, |
|
"logits/chosen": 0.01856027916073799, |
|
"logits/rejected": 0.07309429347515106, |
|
"logps/chosen": -321.44903564453125, |
|
"logps/rejected": -341.5816955566406, |
|
"loss": 0.2495, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.022448932752013206, |
|
"rewards/margins": 0.17198805510997772, |
|
"rewards/rejected": -0.19443701207637787, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.428193654051036e-08, |
|
"logits/chosen": 0.04589134082198143, |
|
"logits/rejected": 0.10319966077804565, |
|
"logps/chosen": -388.9933776855469, |
|
"logps/rejected": -376.8731994628906, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.008324380032718182, |
|
"rewards/margins": 0.20527882874011993, |
|
"rewards/rejected": -0.19695445895195007, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.9386304787299175e-08, |
|
"logits/chosen": 0.03318192437291145, |
|
"logits/rejected": 0.1395682990550995, |
|
"logps/chosen": -377.56622314453125, |
|
"logps/rejected": -377.5900573730469, |
|
"loss": 0.2477, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0049881902523338795, |
|
"rewards/margins": 0.2095176726579666, |
|
"rewards/rejected": -0.2145058661699295, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.8795262629929e-08, |
|
"logits/chosen": 0.03711915761232376, |
|
"logits/rejected": 0.07861719280481339, |
|
"logps/chosen": -311.10015869140625, |
|
"logps/rejected": -340.22918701171875, |
|
"loss": 0.2288, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.007546453736722469, |
|
"rewards/margins": 0.215033620595932, |
|
"rewards/rejected": -0.20748718082904816, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.2545127157831416e-08, |
|
"logits/chosen": 0.06011080741882324, |
|
"logits/rejected": 0.08075010776519775, |
|
"logps/chosen": -342.993408203125, |
|
"logps/rejected": -338.7896728515625, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03219890594482422, |
|
"rewards/margins": 0.15845921635627747, |
|
"rewards/rejected": -0.1906580924987793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0664559262413831e-08, |
|
"logits/chosen": 0.06324592232704163, |
|
"logits/rejected": 0.15417756140232086, |
|
"logps/chosen": -383.63238525390625, |
|
"logps/rejected": -373.19720458984375, |
|
"loss": 0.2445, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.013102272525429726, |
|
"rewards/margins": 0.21051840484142303, |
|
"rewards/rejected": -0.2236206978559494, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.1745130869123564e-09, |
|
"logits/chosen": 0.02718031406402588, |
|
"logits/rejected": 0.09324290603399277, |
|
"logps/chosen": -342.188232421875, |
|
"logps/rejected": -382.42657470703125, |
|
"loss": 0.2445, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02895962819457054, |
|
"rewards/margins": 0.1746593415737152, |
|
"rewards/rejected": -0.20361897349357605, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.819906889168117e-11, |
|
"logits/chosen": 0.07415173202753067, |
|
"logits/rejected": 0.12375295162200928, |
|
"logps/chosen": -362.17572021484375, |
|
"logps/rejected": -372.21044921875, |
|
"loss": 0.2579, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.023642729967832565, |
|
"rewards/margins": 0.1827639937400818, |
|
"rewards/rejected": -0.20640675723552704, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 832, |
|
"total_flos": 0.0, |
|
"train_loss": 0.27172684411589915, |
|
"train_runtime": 11567.6763, |
|
"train_samples_per_second": 3.458, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 832, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|