|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": -0.22574472427368164, |
|
"logits/rejected": -0.2384113073348999, |
|
"logps/chosen": -1586.180908203125, |
|
"logps/rejected": -1626.5421142578125, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": -0.1639188826084137, |
|
"logits/rejected": -0.1851254105567932, |
|
"logps/chosen": -2052.12841796875, |
|
"logps/rejected": -1800.1533203125, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 6.274010956985876e-05, |
|
"rewards/margins": -1.1924101272597909e-05, |
|
"rewards/rejected": 7.466421811841428e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": -0.21358470618724823, |
|
"logits/rejected": -0.1908903419971466, |
|
"logps/chosen": -2196.85498046875, |
|
"logps/rejected": -1773.3756103515625, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": 0.00044371531112119555, |
|
"rewards/margins": 9.080490417545661e-05, |
|
"rewards/rejected": 0.00035291039966978133, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": -0.2191818505525589, |
|
"logits/rejected": -0.22062306106090546, |
|
"logps/chosen": -2141.364501953125, |
|
"logps/rejected": -1710.662353515625, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0025672917254269123, |
|
"rewards/margins": 0.0005076726665720344, |
|
"rewards/rejected": 0.0020596194081008434, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": -0.2520692050457001, |
|
"logits/rejected": -0.22583802044391632, |
|
"logps/chosen": -2189.7646484375, |
|
"logps/rejected": -1715.2425537109375, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00766522204503417, |
|
"rewards/margins": 0.0016571322921663523, |
|
"rewards/rejected": 0.0060080899856984615, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": -0.17123639583587646, |
|
"logits/rejected": -0.19555726647377014, |
|
"logps/chosen": -2526.5703125, |
|
"logps/rejected": -2165.141845703125, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.02015666291117668, |
|
"rewards/margins": 0.0033235768787562847, |
|
"rewards/rejected": 0.01683308556675911, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": -0.18598869442939758, |
|
"logits/rejected": -0.20677652955055237, |
|
"logps/chosen": -2151.3115234375, |
|
"logps/rejected": -1970.6624755859375, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.029178302735090256, |
|
"rewards/margins": 0.0026255736593157053, |
|
"rewards/rejected": 0.026552731171250343, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": -0.18310071527957916, |
|
"logits/rejected": -0.20503754913806915, |
|
"logps/chosen": -1844.6480712890625, |
|
"logps/rejected": -1762.2308349609375, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.028074929490685463, |
|
"rewards/margins": 0.001591854146681726, |
|
"rewards/rejected": 0.026483073830604553, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": -0.22824080288410187, |
|
"logits/rejected": -0.24587313830852509, |
|
"logps/chosen": -1901.586181640625, |
|
"logps/rejected": -1624.0626220703125, |
|
"loss": 0.064, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.031114792451262474, |
|
"rewards/margins": 0.005912109278142452, |
|
"rewards/rejected": 0.025202685967087746, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": -0.2366272509098053, |
|
"logits/rejected": -0.22877153754234314, |
|
"logps/chosen": -1691.4013671875, |
|
"logps/rejected": -1524.5679931640625, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.035714153200387955, |
|
"rewards/margins": 0.0030426979064941406, |
|
"rewards/rejected": 0.032671455293893814, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": -0.22739839553833008, |
|
"logits/rejected": -0.24034900963306427, |
|
"logps/chosen": -2141.99365234375, |
|
"logps/rejected": -2006.7513427734375, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.038611847907304764, |
|
"rewards/margins": 0.0053280796855688095, |
|
"rewards/rejected": 0.033283766359090805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -0.25320005416870117, |
|
"eval_logits/rejected": -0.25199252367019653, |
|
"eval_logps/chosen": -2183.76953125, |
|
"eval_logps/rejected": -1849.702880859375, |
|
"eval_loss": 0.052377186715602875, |
|
"eval_rewards/accuracies": 0.5254999995231628, |
|
"eval_rewards/chosen": 0.03263631835579872, |
|
"eval_rewards/margins": 0.00592681672424078, |
|
"eval_rewards/rejected": 0.026709498837590218, |
|
"eval_runtime": 510.4972, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.979, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": -0.2320372760295868, |
|
"logits/rejected": -0.27123022079467773, |
|
"logps/chosen": -1939.3607177734375, |
|
"logps/rejected": -1764.5439453125, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.027445796877145767, |
|
"rewards/margins": 0.00373500632122159, |
|
"rewards/rejected": 0.023710791021585464, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": -0.2857373058795929, |
|
"logits/rejected": -0.26925256848335266, |
|
"logps/chosen": -2433.180419921875, |
|
"logps/rejected": -2053.70361328125, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03826409578323364, |
|
"rewards/margins": 0.007337054703384638, |
|
"rewards/rejected": 0.030927041545510292, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": -0.27496081590652466, |
|
"logits/rejected": -0.30028867721557617, |
|
"logps/chosen": -2130.792236328125, |
|
"logps/rejected": -1784.03125, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05773577094078064, |
|
"rewards/margins": 0.011168297380208969, |
|
"rewards/rejected": 0.04656747728586197, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": -0.31289300322532654, |
|
"logits/rejected": -0.31437715888023376, |
|
"logps/chosen": -2071.06982421875, |
|
"logps/rejected": -1879.8802490234375, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.07026473432779312, |
|
"rewards/margins": 0.007077778223901987, |
|
"rewards/rejected": 0.06318695098161697, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": -0.29269808530807495, |
|
"logits/rejected": -0.3180951476097107, |
|
"logps/chosen": -2014.0640869140625, |
|
"logps/rejected": -1808.185302734375, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.060369331389665604, |
|
"rewards/margins": 0.007043222431093454, |
|
"rewards/rejected": 0.05332610756158829, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": -0.2773135304450989, |
|
"logits/rejected": -0.2673946022987366, |
|
"logps/chosen": -2283.48779296875, |
|
"logps/rejected": -1938.6422119140625, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.05114240199327469, |
|
"rewards/margins": 0.008266921155154705, |
|
"rewards/rejected": 0.04287547618150711, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": -0.2700185179710388, |
|
"logits/rejected": -0.26662972569465637, |
|
"logps/chosen": -2404.58984375, |
|
"logps/rejected": -1977.1859130859375, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05304870009422302, |
|
"rewards/margins": 0.011285845190286636, |
|
"rewards/rejected": 0.04176285117864609, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": -0.2975671887397766, |
|
"logits/rejected": -0.2988983690738678, |
|
"logps/chosen": -2047.671630859375, |
|
"logps/rejected": -1742.282470703125, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05823253467679024, |
|
"rewards/margins": 0.01046661101281643, |
|
"rewards/rejected": 0.04776592180132866, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": -0.2745932936668396, |
|
"logits/rejected": -0.2855191230773926, |
|
"logps/chosen": -2184.26220703125, |
|
"logps/rejected": -1788.6656494140625, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.06112230569124222, |
|
"rewards/margins": 0.012786999344825745, |
|
"rewards/rejected": 0.04833530634641647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": -0.27325528860092163, |
|
"logits/rejected": -0.2756146490573883, |
|
"logps/chosen": -2187.59130859375, |
|
"logps/rejected": -2025.250732421875, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05468825250864029, |
|
"rewards/margins": 0.006374381482601166, |
|
"rewards/rejected": 0.04831386357545853, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -0.27396515011787415, |
|
"eval_logits/rejected": -0.2760486304759979, |
|
"eval_logps/chosen": -2172.962890625, |
|
"eval_logps/rejected": -1842.2476806640625, |
|
"eval_loss": 0.051403772085905075, |
|
"eval_rewards/accuracies": 0.5389999747276306, |
|
"eval_rewards/chosen": 0.043442659080028534, |
|
"eval_rewards/margins": 0.009277699515223503, |
|
"eval_rewards/rejected": 0.03416495770215988, |
|
"eval_runtime": 510.5925, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": -0.24565927684307098, |
|
"logits/rejected": -0.24346761405467987, |
|
"logps/chosen": -2105.339111328125, |
|
"logps/rejected": -1993.477294921875, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.03817785158753395, |
|
"rewards/margins": 0.0046168239787220955, |
|
"rewards/rejected": 0.03356102854013443, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": -0.2854730486869812, |
|
"logits/rejected": -0.27373185753822327, |
|
"logps/chosen": -2071.35595703125, |
|
"logps/rejected": -1617.6314697265625, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.03769981488585472, |
|
"rewards/margins": 0.012330549769103527, |
|
"rewards/rejected": 0.02536926604807377, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": -0.2610529661178589, |
|
"logits/rejected": -0.28053849935531616, |
|
"logps/chosen": -1956.2564697265625, |
|
"logps/rejected": -1615.5814208984375, |
|
"loss": 0.0735, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04565655067563057, |
|
"rewards/margins": 0.011204726994037628, |
|
"rewards/rejected": 0.03445183113217354, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": -0.24108798801898956, |
|
"logits/rejected": -0.2399587333202362, |
|
"logps/chosen": -1775.907470703125, |
|
"logps/rejected": -1713.854736328125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03222992643713951, |
|
"rewards/margins": 0.008830582723021507, |
|
"rewards/rejected": 0.023399341851472855, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": -0.20024847984313965, |
|
"logits/rejected": -0.22306282818317413, |
|
"logps/chosen": -2255.089599609375, |
|
"logps/rejected": -1934.8642578125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.030694425106048584, |
|
"rewards/margins": 0.01082837488502264, |
|
"rewards/rejected": 0.01986604928970337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": -0.21732480823993683, |
|
"logits/rejected": -0.24718734622001648, |
|
"logps/chosen": -1957.7998046875, |
|
"logps/rejected": -1881.0550537109375, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.05905503034591675, |
|
"rewards/margins": 0.0030602319166064262, |
|
"rewards/rejected": 0.05599479004740715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": -0.24414131045341492, |
|
"logits/rejected": -0.22118325531482697, |
|
"logps/chosen": -1925.445556640625, |
|
"logps/rejected": -1909.0667724609375, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.05300917103886604, |
|
"rewards/margins": 0.006324948277324438, |
|
"rewards/rejected": 0.04668421670794487, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": -0.25582337379455566, |
|
"logits/rejected": -0.2471769154071808, |
|
"logps/chosen": -2269.031982421875, |
|
"logps/rejected": -2033.6907958984375, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.01642546057701111, |
|
"rewards/margins": 0.0032355361618101597, |
|
"rewards/rejected": 0.013189923949539661, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": -0.30079659819602966, |
|
"logits/rejected": -0.28022244572639465, |
|
"logps/chosen": -1992.6142578125, |
|
"logps/rejected": -1820.0687255859375, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.02403336763381958, |
|
"rewards/margins": 0.00603306433185935, |
|
"rewards/rejected": 0.018000302836298943, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": -0.28634804487228394, |
|
"logits/rejected": -0.2918199896812439, |
|
"logps/chosen": -2390.38623046875, |
|
"logps/rejected": -1984.9703369140625, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.035278573632240295, |
|
"rewards/margins": 0.012735734693706036, |
|
"rewards/rejected": 0.022542843595147133, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -0.29014500975608826, |
|
"eval_logits/rejected": -0.28990820050239563, |
|
"eval_logps/chosen": -2182.04541015625, |
|
"eval_logps/rejected": -1851.862060546875, |
|
"eval_loss": 0.05131419003009796, |
|
"eval_rewards/accuracies": 0.5630000233650208, |
|
"eval_rewards/chosen": 0.03436028212308884, |
|
"eval_rewards/margins": 0.009809814393520355, |
|
"eval_rewards/rejected": 0.02455046772956848, |
|
"eval_runtime": 510.7215, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.979, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": -0.2964246869087219, |
|
"logits/rejected": -0.3249427080154419, |
|
"logps/chosen": -2153.874267578125, |
|
"logps/rejected": -1754.1324462890625, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.04025361314415932, |
|
"rewards/margins": 0.009616317227482796, |
|
"rewards/rejected": 0.03063729964196682, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": -0.26167505979537964, |
|
"logits/rejected": -0.2782900929450989, |
|
"logps/chosen": -2090.10986328125, |
|
"logps/rejected": -1866.400146484375, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.044223010540008545, |
|
"rewards/margins": 0.0066053010523319244, |
|
"rewards/rejected": 0.03761770576238632, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": -0.2866571545600891, |
|
"logits/rejected": -0.2904338836669922, |
|
"logps/chosen": -2089.8603515625, |
|
"logps/rejected": -1703.691650390625, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0637887567281723, |
|
"rewards/margins": 0.019382018595933914, |
|
"rewards/rejected": 0.04440673440694809, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": -0.2776980698108673, |
|
"logits/rejected": -0.2663383185863495, |
|
"logps/chosen": -1941.0628662109375, |
|
"logps/rejected": -1724.725830078125, |
|
"loss": 0.057, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.07600688189268112, |
|
"rewards/margins": 0.014780363067984581, |
|
"rewards/rejected": 0.06122652441263199, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": -0.28663453459739685, |
|
"logits/rejected": -0.2781517803668976, |
|
"logps/chosen": -2123.11865234375, |
|
"logps/rejected": -1684.65625, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.07864506542682648, |
|
"rewards/margins": 0.023925408720970154, |
|
"rewards/rejected": 0.05471965670585632, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": -0.2786110043525696, |
|
"logits/rejected": -0.2901211082935333, |
|
"logps/chosen": -2082.64208984375, |
|
"logps/rejected": -1863.649169921875, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.052565790712833405, |
|
"rewards/margins": 0.014038707129657269, |
|
"rewards/rejected": 0.038527075201272964, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": -0.34627729654312134, |
|
"logits/rejected": -0.33580657839775085, |
|
"logps/chosen": -1979.3060302734375, |
|
"logps/rejected": -1685.088134765625, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.017150847241282463, |
|
"rewards/margins": 0.008625769056379795, |
|
"rewards/rejected": 0.008525079116225243, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": -0.28853368759155273, |
|
"logits/rejected": -0.33309391140937805, |
|
"logps/chosen": -2051.138671875, |
|
"logps/rejected": -1604.300537109375, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.03238735720515251, |
|
"rewards/margins": 0.011523480527102947, |
|
"rewards/rejected": 0.02086387760937214, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": -0.2653834819793701, |
|
"logits/rejected": -0.27924028038978577, |
|
"logps/chosen": -2167.791748046875, |
|
"logps/rejected": -1883.7181396484375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.07641658931970596, |
|
"rewards/margins": 0.014170339331030846, |
|
"rewards/rejected": 0.06224624067544937, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": -0.23558492958545685, |
|
"logits/rejected": -0.252250611782074, |
|
"logps/chosen": -2008.2281494140625, |
|
"logps/rejected": -1735.037109375, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.08481944352388382, |
|
"rewards/margins": 0.018725356087088585, |
|
"rewards/rejected": 0.06609407812356949, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -0.26833415031433105, |
|
"eval_logits/rejected": -0.27769944071769714, |
|
"eval_logps/chosen": -2134.577880859375, |
|
"eval_logps/rejected": -1810.503662109375, |
|
"eval_loss": 0.052033666521310806, |
|
"eval_rewards/accuracies": 0.5249999761581421, |
|
"eval_rewards/chosen": 0.08182776719331741, |
|
"eval_rewards/margins": 0.01591898687183857, |
|
"eval_rewards/rejected": 0.06590878218412399, |
|
"eval_runtime": 510.467, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.979, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": -0.22416555881500244, |
|
"logits/rejected": -0.23775295913219452, |
|
"logps/chosen": -2305.072265625, |
|
"logps/rejected": -2017.150390625, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.07892463356256485, |
|
"rewards/margins": 0.010944006033241749, |
|
"rewards/rejected": 0.06798062473535538, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": -0.24204190075397491, |
|
"logits/rejected": -0.24225695431232452, |
|
"logps/chosen": -1825.3125, |
|
"logps/rejected": -1693.0045166015625, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.05524778366088867, |
|
"rewards/margins": 0.006465147249400616, |
|
"rewards/rejected": 0.048782628029584885, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": -0.23344504833221436, |
|
"logits/rejected": -0.27365198731422424, |
|
"logps/chosen": -2049.459716796875, |
|
"logps/rejected": -1840.787841796875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04559114947915077, |
|
"rewards/margins": 0.006585550494492054, |
|
"rewards/rejected": 0.039005596190690994, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": -0.27591726183891296, |
|
"logits/rejected": -0.2608277499675751, |
|
"logps/chosen": -1806.8870849609375, |
|
"logps/rejected": -1811.4437255859375, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.033870596438646317, |
|
"rewards/margins": 0.0030527892522513866, |
|
"rewards/rejected": 0.030817802995443344, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": -0.2728896141052246, |
|
"logits/rejected": -0.2633044123649597, |
|
"logps/chosen": -2298.3818359375, |
|
"logps/rejected": -2048.328125, |
|
"loss": 0.052, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.027686957269906998, |
|
"rewards/margins": 0.0040281787514686584, |
|
"rewards/rejected": 0.02365877851843834, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": -0.2538016438484192, |
|
"logits/rejected": -0.25012341141700745, |
|
"logps/chosen": -2255.5146484375, |
|
"logps/rejected": -1954.8531494140625, |
|
"loss": 0.0536, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.03767388314008713, |
|
"rewards/margins": 0.008208373561501503, |
|
"rewards/rejected": 0.029465511441230774, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": -0.2599068284034729, |
|
"logits/rejected": -0.26421061158180237, |
|
"logps/chosen": -2130.776123046875, |
|
"logps/rejected": -1925.4456787109375, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.05753815174102783, |
|
"rewards/margins": 0.011079727672040462, |
|
"rewards/rejected": 0.04645842686295509, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": -0.27710121870040894, |
|
"logits/rejected": -0.2857569754123688, |
|
"logps/chosen": -2018.772705078125, |
|
"logps/rejected": -1903.8472900390625, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.07305508106946945, |
|
"rewards/margins": 0.009427006356418133, |
|
"rewards/rejected": 0.06362807750701904, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": -0.28116849064826965, |
|
"logits/rejected": -0.2983720004558563, |
|
"logps/chosen": -1921.1497802734375, |
|
"logps/rejected": -1723.8843994140625, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0624106340110302, |
|
"rewards/margins": 0.009529463946819305, |
|
"rewards/rejected": 0.05288117378950119, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": -0.27481353282928467, |
|
"logits/rejected": -0.29158735275268555, |
|
"logps/chosen": -2325.54345703125, |
|
"logps/rejected": -2043.1536865234375, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0638991966843605, |
|
"rewards/margins": 0.01127773616462946, |
|
"rewards/rejected": 0.052621446549892426, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -0.29030030965805054, |
|
"eval_logits/rejected": -0.29912662506103516, |
|
"eval_logps/chosen": -2155.4169921875, |
|
"eval_logps/rejected": -1828.8736572265625, |
|
"eval_loss": 0.05023103952407837, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": 0.060988761484622955, |
|
"eval_rewards/margins": 0.013449816033244133, |
|
"eval_rewards/rejected": 0.04753894358873367, |
|
"eval_runtime": 510.5382, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": -0.2749403417110443, |
|
"logits/rejected": -0.28757306933403015, |
|
"logps/chosen": -2167.8203125, |
|
"logps/rejected": -1664.1771240234375, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06645651906728745, |
|
"rewards/margins": 0.021903514862060547, |
|
"rewards/rejected": 0.0445530042052269, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": -0.3093597888946533, |
|
"logits/rejected": -0.3130527138710022, |
|
"logps/chosen": -2028.697509765625, |
|
"logps/rejected": -1775.0302734375, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.07832999527454376, |
|
"rewards/margins": 0.014924841932952404, |
|
"rewards/rejected": 0.06340514868497849, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": -0.32513946294784546, |
|
"logits/rejected": -0.34443390369415283, |
|
"logps/chosen": -2135.48974609375, |
|
"logps/rejected": -1824.90625, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.06139357015490532, |
|
"rewards/margins": 0.012111430056393147, |
|
"rewards/rejected": 0.04928214102983475, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": -0.34302735328674316, |
|
"logits/rejected": -0.36917632818222046, |
|
"logps/chosen": -2007.6627197265625, |
|
"logps/rejected": -1699.0699462890625, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04311789572238922, |
|
"rewards/margins": 0.012735480442643166, |
|
"rewards/rejected": 0.030382419005036354, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": -0.35971927642822266, |
|
"logits/rejected": -0.36432451009750366, |
|
"logps/chosen": -2163.0068359375, |
|
"logps/rejected": -2066.22509765625, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.04490477591753006, |
|
"rewards/margins": 0.006312023848295212, |
|
"rewards/rejected": 0.03859275206923485, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": -0.38384127616882324, |
|
"logits/rejected": -0.3922134339809418, |
|
"logps/chosen": -2226.274658203125, |
|
"logps/rejected": -1989.887451171875, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.057973384857177734, |
|
"rewards/margins": 0.015295244753360748, |
|
"rewards/rejected": 0.042678140103816986, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": -0.37657466530799866, |
|
"logits/rejected": -0.38766008615493774, |
|
"logps/chosen": -1836.3118896484375, |
|
"logps/rejected": -1472.295654296875, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.07644511014223099, |
|
"rewards/margins": 0.02132570371031761, |
|
"rewards/rejected": 0.05511941760778427, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": -0.37011387944221497, |
|
"logits/rejected": -0.42118391394615173, |
|
"logps/chosen": -2419.860107421875, |
|
"logps/rejected": -1769.7777099609375, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.12023104727268219, |
|
"rewards/margins": 0.03250167518854141, |
|
"rewards/rejected": 0.08772937208414078, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": -0.35418859124183655, |
|
"logits/rejected": -0.37661364674568176, |
|
"logps/chosen": -1887.2279052734375, |
|
"logps/rejected": -1624.3062744140625, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.08441803604364395, |
|
"rewards/margins": 0.01872970722615719, |
|
"rewards/rejected": 0.06568832695484161, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": -0.36238303780555725, |
|
"logits/rejected": -0.36792057752609253, |
|
"logps/chosen": -1933.2572021484375, |
|
"logps/rejected": -1661.876953125, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06127943471074104, |
|
"rewards/margins": 0.016731832176446915, |
|
"rewards/rejected": 0.044547609984874725, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.37191054224967957, |
|
"eval_logits/rejected": -0.38397690653800964, |
|
"eval_logps/chosen": -2167.108642578125, |
|
"eval_logps/rejected": -1839.52685546875, |
|
"eval_loss": 0.05038134753704071, |
|
"eval_rewards/accuracies": 0.5525000095367432, |
|
"eval_rewards/chosen": 0.049297019839286804, |
|
"eval_rewards/margins": 0.01241131592541933, |
|
"eval_rewards/rejected": 0.03688570857048035, |
|
"eval_runtime": 510.5837, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": -0.3634631633758545, |
|
"logits/rejected": -0.37499555945396423, |
|
"logps/chosen": -2023.001220703125, |
|
"logps/rejected": -1739.4332275390625, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.041168130934238434, |
|
"rewards/margins": 0.010140376165509224, |
|
"rewards/rejected": 0.03102775290608406, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": -0.3298744261264801, |
|
"logits/rejected": -0.32282137870788574, |
|
"logps/chosen": -2020.5091552734375, |
|
"logps/rejected": -1689.3531494140625, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.050172846764326096, |
|
"rewards/margins": 0.009077770635485649, |
|
"rewards/rejected": 0.041095077991485596, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": -0.321160227060318, |
|
"logits/rejected": -0.34205105900764465, |
|
"logps/chosen": -1793.309326171875, |
|
"logps/rejected": -1498.567626953125, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0489073321223259, |
|
"rewards/margins": 0.012575352564454079, |
|
"rewards/rejected": 0.03633198142051697, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": -0.32717442512512207, |
|
"logits/rejected": -0.34008845686912537, |
|
"logps/chosen": -1978.6207275390625, |
|
"logps/rejected": -1785.7669677734375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.05437788367271423, |
|
"rewards/margins": 0.005566168110817671, |
|
"rewards/rejected": 0.048811715096235275, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": -0.3271678388118744, |
|
"logits/rejected": -0.3383072018623352, |
|
"logps/chosen": -2002.5355224609375, |
|
"logps/rejected": -1623.6373291015625, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.056448131799697876, |
|
"rewards/margins": 0.013460059650242329, |
|
"rewards/rejected": 0.04298807680606842, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": -0.28324219584465027, |
|
"logits/rejected": -0.2753041982650757, |
|
"logps/chosen": -1772.493896484375, |
|
"logps/rejected": -1581.4808349609375, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.04456415772438049, |
|
"rewards/margins": 0.007894165813922882, |
|
"rewards/rejected": 0.03666999563574791, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": -0.286260187625885, |
|
"logits/rejected": -0.3045397698879242, |
|
"logps/chosen": -1830.4456787109375, |
|
"logps/rejected": -1603.759521484375, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.05162501335144043, |
|
"rewards/margins": 0.011338387615978718, |
|
"rewards/rejected": 0.040286630392074585, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": -0.2880704998970032, |
|
"logits/rejected": -0.2942127585411072, |
|
"logps/chosen": -2038.3916015625, |
|
"logps/rejected": -1857.885498046875, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.06582482159137726, |
|
"rewards/margins": 0.009796356782317162, |
|
"rewards/rejected": 0.05602846294641495, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": -0.29781144857406616, |
|
"logits/rejected": -0.3116939663887024, |
|
"logps/chosen": -2132.72802734375, |
|
"logps/rejected": -1934.0364990234375, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.06504924595355988, |
|
"rewards/margins": 0.00774806085973978, |
|
"rewards/rejected": 0.05730118602514267, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": -0.3007664084434509, |
|
"logits/rejected": -0.29853954911231995, |
|
"logps/chosen": -1988.636962890625, |
|
"logps/rejected": -1707.418212890625, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.05466890335083008, |
|
"rewards/margins": 0.013832475058734417, |
|
"rewards/rejected": 0.040836431086063385, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -0.3144506812095642, |
|
"eval_logits/rejected": -0.3237921893596649, |
|
"eval_logps/chosen": -2163.694091796875, |
|
"eval_logps/rejected": -1836.1396484375, |
|
"eval_loss": 0.05007108300924301, |
|
"eval_rewards/accuracies": 0.5669999718666077, |
|
"eval_rewards/chosen": 0.052711814641952515, |
|
"eval_rewards/margins": 0.012439063750207424, |
|
"eval_rewards/rejected": 0.04027275741100311, |
|
"eval_runtime": 510.3528, |
|
"eval_samples_per_second": 3.919, |
|
"eval_steps_per_second": 0.98, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": -0.28171759843826294, |
|
"logits/rejected": -0.3016406297683716, |
|
"logps/chosen": -1951.7197265625, |
|
"logps/rejected": -1821.9302978515625, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.046342235058546066, |
|
"rewards/margins": 0.00639796257019043, |
|
"rewards/rejected": 0.03994427248835564, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": -0.30082041025161743, |
|
"logits/rejected": -0.3195782005786896, |
|
"logps/chosen": -2009.2193603515625, |
|
"logps/rejected": -1790.225830078125, |
|
"loss": 0.0684, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.04109364002943039, |
|
"rewards/margins": 0.007526120636612177, |
|
"rewards/rejected": 0.03356752544641495, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": -0.3266572058200836, |
|
"logits/rejected": -0.3410620093345642, |
|
"logps/chosen": -2097.94140625, |
|
"logps/rejected": -1642.9635009765625, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06069540977478027, |
|
"rewards/margins": 0.01798270270228386, |
|
"rewards/rejected": 0.042712707072496414, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": -0.29377710819244385, |
|
"logits/rejected": -0.2976624369621277, |
|
"logps/chosen": -2122.676513671875, |
|
"logps/rejected": -1956.9495849609375, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0653495341539383, |
|
"rewards/margins": 0.013613177463412285, |
|
"rewards/rejected": 0.051736362278461456, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": -0.33634868264198303, |
|
"logits/rejected": -0.3460080027580261, |
|
"logps/chosen": -1977.577392578125, |
|
"logps/rejected": -1716.836669921875, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05900438502430916, |
|
"rewards/margins": 0.01787043735384941, |
|
"rewards/rejected": 0.04113394767045975, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": -0.3246403634548187, |
|
"logits/rejected": -0.3240343928337097, |
|
"logps/chosen": -2022.0374755859375, |
|
"logps/rejected": -1934.3929443359375, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.056883443146944046, |
|
"rewards/margins": 0.010135297663509846, |
|
"rewards/rejected": 0.046748142689466476, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": -0.32194751501083374, |
|
"logits/rejected": -0.3437530994415283, |
|
"logps/chosen": -2066.994873046875, |
|
"logps/rejected": -1785.517333984375, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.06701908260583878, |
|
"rewards/margins": 0.017347043380141258, |
|
"rewards/rejected": 0.04967203736305237, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": -0.32135313749313354, |
|
"logits/rejected": -0.33263832330703735, |
|
"logps/chosen": -2046.4375, |
|
"logps/rejected": -1752.5133056640625, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.07863454520702362, |
|
"rewards/margins": 0.013034949079155922, |
|
"rewards/rejected": 0.06559960544109344, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": -0.30609697103500366, |
|
"logits/rejected": -0.3328899145126343, |
|
"logps/chosen": -2209.924560546875, |
|
"logps/rejected": -1803.526123046875, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.08860823512077332, |
|
"rewards/margins": 0.02287045121192932, |
|
"rewards/rejected": 0.065737783908844, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": -0.301249623298645, |
|
"logits/rejected": -0.3167082369327545, |
|
"logps/chosen": -1958.112548828125, |
|
"logps/rejected": -1749.8187255859375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0594819113612175, |
|
"rewards/margins": 0.00925761554390192, |
|
"rewards/rejected": 0.050224293023347855, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.2990359365940094, |
|
"eval_logits/rejected": -0.3079277575016022, |
|
"eval_logps/chosen": -2160.533447265625, |
|
"eval_logps/rejected": -1833.001220703125, |
|
"eval_loss": 0.05018917843699455, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": 0.055872511118650436, |
|
"eval_rewards/margins": 0.012461244128644466, |
|
"eval_rewards/rejected": 0.043411269783973694, |
|
"eval_runtime": 510.4542, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.98, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": -0.2567403316497803, |
|
"logits/rejected": -0.3088562786579132, |
|
"logps/chosen": -1969.7041015625, |
|
"logps/rejected": -1587.13134765625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.05050656199455261, |
|
"rewards/margins": 0.01574171707034111, |
|
"rewards/rejected": 0.0347648449242115, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": -0.3206945061683655, |
|
"logits/rejected": -0.32324275374412537, |
|
"logps/chosen": -2125.584228515625, |
|
"logps/rejected": -1908.9595947265625, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07730694115161896, |
|
"rewards/margins": 0.020585492253303528, |
|
"rewards/rejected": 0.05672144889831543, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": -0.30177921056747437, |
|
"logits/rejected": -0.30555492639541626, |
|
"logps/chosen": -2047.0084228515625, |
|
"logps/rejected": -1866.709228515625, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.080367811024189, |
|
"rewards/margins": 0.01644848845899105, |
|
"rewards/rejected": 0.0639193207025528, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": -0.2914479076862335, |
|
"logits/rejected": -0.3034920394420624, |
|
"logps/chosen": -2094.360595703125, |
|
"logps/rejected": -1812.98046875, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.08005829900503159, |
|
"rewards/margins": 0.010312746278941631, |
|
"rewards/rejected": 0.06974555552005768, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": -0.3109249472618103, |
|
"logits/rejected": -0.32668763399124146, |
|
"logps/chosen": -2224.466796875, |
|
"logps/rejected": -1824.3785400390625, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06907899677753448, |
|
"rewards/margins": 0.012219742871820927, |
|
"rewards/rejected": 0.056859247386455536, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": -0.29552769660949707, |
|
"logits/rejected": -0.30279669165611267, |
|
"logps/chosen": -2010.127685546875, |
|
"logps/rejected": -1726.2581787109375, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.05864205211400986, |
|
"rewards/margins": 0.011820727959275246, |
|
"rewards/rejected": 0.04682133346796036, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": -0.30413001775741577, |
|
"logits/rejected": -0.317624032497406, |
|
"logps/chosen": -2200.095947265625, |
|
"logps/rejected": -1815.937744140625, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.07513566315174103, |
|
"rewards/margins": 0.01764606684446335, |
|
"rewards/rejected": 0.057489603757858276, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": -0.2624972462654114, |
|
"logits/rejected": -0.28810930252075195, |
|
"logps/chosen": -2114.169189453125, |
|
"logps/rejected": -1792.790771484375, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0786682516336441, |
|
"rewards/margins": 0.01509961299598217, |
|
"rewards/rejected": 0.06356863677501678, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": -0.2368161380290985, |
|
"logits/rejected": -0.24519118666648865, |
|
"logps/chosen": -1967.4462890625, |
|
"logps/rejected": -1798.807373046875, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.07654932141304016, |
|
"rewards/margins": 0.007422330789268017, |
|
"rewards/rejected": 0.06912699341773987, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": -0.21885935962200165, |
|
"logits/rejected": -0.24373655021190643, |
|
"logps/chosen": -2283.5390625, |
|
"logps/rejected": -1840.8265380859375, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.09643899649381638, |
|
"rewards/margins": 0.018616409972310066, |
|
"rewards/rejected": 0.07782258838415146, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -0.24547961354255676, |
|
"eval_logits/rejected": -0.25286465883255005, |
|
"eval_logps/chosen": -2129.181884765625, |
|
"eval_logps/rejected": -1806.208740234375, |
|
"eval_loss": 0.050007544457912445, |
|
"eval_rewards/accuracies": 0.5485000014305115, |
|
"eval_rewards/chosen": 0.0872238427400589, |
|
"eval_rewards/margins": 0.017020048573613167, |
|
"eval_rewards/rejected": 0.07020379602909088, |
|
"eval_runtime": 510.5362, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": -0.22288069128990173, |
|
"logits/rejected": -0.2447211742401123, |
|
"logps/chosen": -2140.885498046875, |
|
"logps/rejected": -1654.9674072265625, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.09542791545391083, |
|
"rewards/margins": 0.02674751542508602, |
|
"rewards/rejected": 0.06868041306734085, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": -0.2237463891506195, |
|
"logits/rejected": -0.2500147521495819, |
|
"logps/chosen": -2403.999267578125, |
|
"logps/rejected": -1815.796142578125, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.09730223566293716, |
|
"rewards/margins": 0.025161966681480408, |
|
"rewards/rejected": 0.07214026153087616, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": -0.2534050941467285, |
|
"logits/rejected": -0.27334827184677124, |
|
"logps/chosen": -2099.66064453125, |
|
"logps/rejected": -1671.605712890625, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.09167732298374176, |
|
"rewards/margins": 0.01841827854514122, |
|
"rewards/rejected": 0.07325904071331024, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": -0.24992087483406067, |
|
"logits/rejected": -0.2547626495361328, |
|
"logps/chosen": -2133.786376953125, |
|
"logps/rejected": -1852.0260009765625, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.09754703938961029, |
|
"rewards/margins": 0.015503397211432457, |
|
"rewards/rejected": 0.08204366266727448, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": -0.23162353038787842, |
|
"logits/rejected": -0.24423262476921082, |
|
"logps/chosen": -1878.375732421875, |
|
"logps/rejected": -1515.8773193359375, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07727902382612228, |
|
"rewards/margins": 0.018643613904714584, |
|
"rewards/rejected": 0.058635413646698, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": -0.2648778259754181, |
|
"logits/rejected": -0.2825019359588623, |
|
"logps/chosen": -2200.065185546875, |
|
"logps/rejected": -1777.919677734375, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.07928620278835297, |
|
"rewards/margins": 0.01838754117488861, |
|
"rewards/rejected": 0.06089866906404495, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": -0.23890802264213562, |
|
"logits/rejected": -0.2561323344707489, |
|
"logps/chosen": -2259.957763671875, |
|
"logps/rejected": -1922.253173828125, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.06860624998807907, |
|
"rewards/margins": 0.010691315867006779, |
|
"rewards/rejected": 0.05791493132710457, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": -0.2580435872077942, |
|
"logits/rejected": -0.2608950734138489, |
|
"logps/chosen": -2114.46044921875, |
|
"logps/rejected": -1843.2421875, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.060202427208423615, |
|
"rewards/margins": 0.0087806461378932, |
|
"rewards/rejected": 0.05142177268862724, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": -0.26601457595825195, |
|
"logits/rejected": -0.27015531063079834, |
|
"logps/chosen": -1853.6099853515625, |
|
"logps/rejected": -1555.1754150390625, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.060611844062805176, |
|
"rewards/margins": 0.013232124969363213, |
|
"rewards/rejected": 0.047379713505506516, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": -0.24036483466625214, |
|
"logits/rejected": -0.2455415278673172, |
|
"logps/chosen": -1859.8070068359375, |
|
"logps/rejected": -1691.760498046875, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06643722951412201, |
|
"rewards/margins": 0.01359983254224062, |
|
"rewards/rejected": 0.052837394177913666, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.2564674913883209, |
|
"eval_logits/rejected": -0.25929296016693115, |
|
"eval_logps/chosen": -2156.65283203125, |
|
"eval_logps/rejected": -1829.5831298828125, |
|
"eval_loss": 0.04961266368627548, |
|
"eval_rewards/accuracies": 0.5649999976158142, |
|
"eval_rewards/chosen": 0.05975308269262314, |
|
"eval_rewards/margins": 0.012923642992973328, |
|
"eval_rewards/rejected": 0.04682943597435951, |
|
"eval_runtime": 510.5574, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": -0.251176655292511, |
|
"logits/rejected": -0.255452036857605, |
|
"logps/chosen": -2234.47119140625, |
|
"logps/rejected": -1848.127685546875, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06075858324766159, |
|
"rewards/margins": 0.016493605449795723, |
|
"rewards/rejected": 0.04426497966051102, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": -0.2578621506690979, |
|
"logits/rejected": -0.26309382915496826, |
|
"logps/chosen": -2132.969970703125, |
|
"logps/rejected": -1790.293212890625, |
|
"loss": 0.034, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06318069994449615, |
|
"rewards/margins": 0.01207827776670456, |
|
"rewards/rejected": 0.0511024184525013, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": -0.25486692786216736, |
|
"logits/rejected": -0.24112336337566376, |
|
"logps/chosen": -1739.303955078125, |
|
"logps/rejected": -1716.464599609375, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.053387343883514404, |
|
"rewards/margins": 0.00615662382915616, |
|
"rewards/rejected": 0.04723071679472923, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": -0.23829662799835205, |
|
"logits/rejected": -0.2544878125190735, |
|
"logps/chosen": -2119.11474609375, |
|
"logps/rejected": -1764.7984619140625, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06005573272705078, |
|
"rewards/margins": 0.01636183261871338, |
|
"rewards/rejected": 0.0436939001083374, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": -0.2548423409461975, |
|
"logits/rejected": -0.24885638058185577, |
|
"logps/chosen": -2136.72998046875, |
|
"logps/rejected": -1792.2359619140625, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0639239102602005, |
|
"rewards/margins": 0.014888137578964233, |
|
"rewards/rejected": 0.04903577268123627, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": -0.2554526925086975, |
|
"logits/rejected": -0.29498496651649475, |
|
"logps/chosen": -1785.2249755859375, |
|
"logps/rejected": -1444.23291015625, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.051430024206638336, |
|
"rewards/margins": 0.014165714383125305, |
|
"rewards/rejected": 0.03726430982351303, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": -0.25295186042785645, |
|
"logits/rejected": -0.2443423569202423, |
|
"logps/chosen": -2214.04541015625, |
|
"logps/rejected": -1779.729248046875, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.07067938894033432, |
|
"rewards/margins": 0.0159921832382679, |
|
"rewards/rejected": 0.05468720197677612, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": -0.21839866042137146, |
|
"logits/rejected": -0.22934658825397491, |
|
"logps/chosen": -2245.37646484375, |
|
"logps/rejected": -2051.3115234375, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.07870273292064667, |
|
"rewards/margins": 0.015385419130325317, |
|
"rewards/rejected": 0.06331731379032135, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": -0.2654665410518646, |
|
"logits/rejected": -0.2958211302757263, |
|
"logps/chosen": -2201.913818359375, |
|
"logps/rejected": -1623.56298828125, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.08239830285310745, |
|
"rewards/margins": 0.022424213588237762, |
|
"rewards/rejected": 0.05997408553957939, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": -0.24744835495948792, |
|
"logits/rejected": -0.27335745096206665, |
|
"logps/chosen": -2357.655029296875, |
|
"logps/rejected": -1799.583740234375, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.09354601800441742, |
|
"rewards/margins": 0.019701533019542694, |
|
"rewards/rejected": 0.07384449243545532, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -0.23114541172981262, |
|
"eval_logits/rejected": -0.2394075095653534, |
|
"eval_logps/chosen": -2124.1083984375, |
|
"eval_logps/rejected": -1802.593505859375, |
|
"eval_loss": 0.04950037598609924, |
|
"eval_rewards/accuracies": 0.5559999942779541, |
|
"eval_rewards/chosen": 0.0922975018620491, |
|
"eval_rewards/margins": 0.018478482961654663, |
|
"eval_rewards/rejected": 0.07381902635097504, |
|
"eval_runtime": 510.4268, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.98, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": -0.2077624499797821, |
|
"logits/rejected": -0.2412451207637787, |
|
"logps/chosen": -2270.83935546875, |
|
"logps/rejected": -1800.899169921875, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.09902598708868027, |
|
"rewards/margins": 0.026153406128287315, |
|
"rewards/rejected": 0.0728725865483284, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": -0.20635256171226501, |
|
"logits/rejected": -0.19912874698638916, |
|
"logps/chosen": -2253.06689453125, |
|
"logps/rejected": -1995.773193359375, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.08980433642864227, |
|
"rewards/margins": 0.015359434299170971, |
|
"rewards/rejected": 0.07444489747285843, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": -0.21904154121875763, |
|
"logits/rejected": -0.24687853455543518, |
|
"logps/chosen": -2060.12109375, |
|
"logps/rejected": -1746.0015869140625, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0698038712143898, |
|
"rewards/margins": 0.01698939874768257, |
|
"rewards/rejected": 0.05281447246670723, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": -0.2082248479127884, |
|
"logits/rejected": -0.21504366397857666, |
|
"logps/chosen": -2301.1181640625, |
|
"logps/rejected": -1757.7796630859375, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0717499703168869, |
|
"rewards/margins": 0.022804908454418182, |
|
"rewards/rejected": 0.04894506186246872, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": -0.21141843497753143, |
|
"logits/rejected": -0.2168281078338623, |
|
"logps/chosen": -2182.329345703125, |
|
"logps/rejected": -1772.6962890625, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.07858923077583313, |
|
"rewards/margins": 0.014738768339157104, |
|
"rewards/rejected": 0.06385046243667603, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": -0.21451938152313232, |
|
"logits/rejected": -0.23753699660301208, |
|
"logps/chosen": -2020.727783203125, |
|
"logps/rejected": -1757.3990478515625, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.07901870459318161, |
|
"rewards/margins": 0.01762666180729866, |
|
"rewards/rejected": 0.06139205023646355, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": -0.18361331522464752, |
|
"logits/rejected": -0.1837645322084427, |
|
"logps/chosen": -2371.175537109375, |
|
"logps/rejected": -1958.777099609375, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0850948616862297, |
|
"rewards/margins": 0.020093852654099464, |
|
"rewards/rejected": 0.06500101089477539, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": -0.22541293501853943, |
|
"logits/rejected": -0.23021379113197327, |
|
"logps/chosen": -2021.7099609375, |
|
"logps/rejected": -1991.3853759765625, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.06989626586437225, |
|
"rewards/margins": 0.008970921859145164, |
|
"rewards/rejected": 0.060925353318452835, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": -0.19789089262485504, |
|
"logits/rejected": -0.21101799607276917, |
|
"logps/chosen": -2177.219970703125, |
|
"logps/rejected": -1758.7890625, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06635448336601257, |
|
"rewards/margins": 0.014402633532881737, |
|
"rewards/rejected": 0.05195184424519539, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": -0.2192670851945877, |
|
"logits/rejected": -0.20262674987316132, |
|
"logps/chosen": -2045.339111328125, |
|
"logps/rejected": -1922.4957275390625, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.06084597855806351, |
|
"rewards/margins": 0.0031062946654856205, |
|
"rewards/rejected": 0.05773968622088432, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.21473824977874756, |
|
"eval_logits/rejected": -0.2180851548910141, |
|
"eval_logps/chosen": -2155.742919921875, |
|
"eval_logps/rejected": -1829.7305908203125, |
|
"eval_loss": 0.04951399564743042, |
|
"eval_rewards/accuracies": 0.5684999823570251, |
|
"eval_rewards/chosen": 0.06066294014453888, |
|
"eval_rewards/margins": 0.013980962336063385, |
|
"eval_rewards/rejected": 0.046681977808475494, |
|
"eval_runtime": 510.4546, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.98, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": -0.23097166419029236, |
|
"logits/rejected": -0.2347377985715866, |
|
"logps/chosen": -1948.361328125, |
|
"logps/rejected": -1586.739013671875, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05206098034977913, |
|
"rewards/margins": 0.012983322143554688, |
|
"rewards/rejected": 0.03907765448093414, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": -0.20886722207069397, |
|
"logits/rejected": -0.21028542518615723, |
|
"logps/chosen": -2190.082763671875, |
|
"logps/rejected": -1998.083984375, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.060171376913785934, |
|
"rewards/margins": 0.008495164103806019, |
|
"rewards/rejected": 0.05167621374130249, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": -0.18295393884181976, |
|
"logits/rejected": -0.1880742609500885, |
|
"logps/chosen": -2117.80908203125, |
|
"logps/rejected": -1735.502197265625, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.060192208737134933, |
|
"rewards/margins": 0.009002082981169224, |
|
"rewards/rejected": 0.05119013041257858, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": -0.19351014494895935, |
|
"logits/rejected": -0.20447520911693573, |
|
"logps/chosen": -2093.595703125, |
|
"logps/rejected": -1855.68359375, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.06083123758435249, |
|
"rewards/margins": 0.008342139422893524, |
|
"rewards/rejected": 0.05248909443616867, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": -0.20279578864574432, |
|
"logits/rejected": -0.21171894669532776, |
|
"logps/chosen": -2053.47412109375, |
|
"logps/rejected": -1884.202392578125, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06217293068766594, |
|
"rewards/margins": 0.01359265111386776, |
|
"rewards/rejected": 0.04858027398586273, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": -0.2138860523700714, |
|
"logits/rejected": -0.20899005234241486, |
|
"logps/chosen": -2356.08447265625, |
|
"logps/rejected": -2091.6435546875, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.06469549238681793, |
|
"rewards/margins": 0.009515106678009033, |
|
"rewards/rejected": 0.055180393159389496, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": -0.22949472069740295, |
|
"logits/rejected": -0.22715874016284943, |
|
"logps/chosen": -2223.537353515625, |
|
"logps/rejected": -1747.806640625, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06524848937988281, |
|
"rewards/margins": 0.01758180931210518, |
|
"rewards/rejected": 0.047666680067777634, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": -0.19447948038578033, |
|
"logits/rejected": -0.2098011076450348, |
|
"logps/chosen": -1938.2740478515625, |
|
"logps/rejected": -1719.427734375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.057724129408597946, |
|
"rewards/margins": 0.012677346356213093, |
|
"rewards/rejected": 0.04504678025841713, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": -0.20776407420635223, |
|
"logits/rejected": -0.1970272809267044, |
|
"logps/chosen": -1996.076171875, |
|
"logps/rejected": -1722.409423828125, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.06180752441287041, |
|
"rewards/margins": 0.012057540938258171, |
|
"rewards/rejected": 0.049749989062547684, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": -0.19028015434741974, |
|
"logits/rejected": -0.18771126866340637, |
|
"logps/chosen": -2132.365966796875, |
|
"logps/rejected": -1654.494140625, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.053263597190380096, |
|
"rewards/margins": 0.01780819520354271, |
|
"rewards/rejected": 0.03545539826154709, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -0.21745455265045166, |
|
"eval_logits/rejected": -0.22021788358688354, |
|
"eval_logps/chosen": -2159.675537109375, |
|
"eval_logps/rejected": -1833.5484619140625, |
|
"eval_loss": 0.04945502430200577, |
|
"eval_rewards/accuracies": 0.5690000057220459, |
|
"eval_rewards/chosen": 0.05673002824187279, |
|
"eval_rewards/margins": 0.013866120018064976, |
|
"eval_rewards/rejected": 0.042863909155130386, |
|
"eval_runtime": 510.5607, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": -0.20240898430347443, |
|
"logits/rejected": -0.23169991374015808, |
|
"logps/chosen": -2309.421630859375, |
|
"logps/rejected": -1885.686279296875, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.06426791846752167, |
|
"rewards/margins": 0.016114329919219017, |
|
"rewards/rejected": 0.0481535978615284, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": -0.2012084424495697, |
|
"logits/rejected": -0.23439760506153107, |
|
"logps/chosen": -1958.7874755859375, |
|
"logps/rejected": -1563.35302734375, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.05583029240369797, |
|
"rewards/margins": 0.016821032389998436, |
|
"rewards/rejected": 0.03900925815105438, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": -0.2176096886396408, |
|
"logits/rejected": -0.23196351528167725, |
|
"logps/chosen": -2121.034912109375, |
|
"logps/rejected": -1711.7109375, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.06722841411828995, |
|
"rewards/margins": 0.011516690254211426, |
|
"rewards/rejected": 0.055711716413497925, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": -0.19108158349990845, |
|
"logits/rejected": -0.2023816853761673, |
|
"logps/chosen": -2302.89697265625, |
|
"logps/rejected": -1892.548095703125, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.08529181778430939, |
|
"rewards/margins": 0.020634423941373825, |
|
"rewards/rejected": 0.06465739011764526, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": -0.2374308556318283, |
|
"logits/rejected": -0.2234220951795578, |
|
"logps/chosen": -2113.50146484375, |
|
"logps/rejected": -1874.0279541015625, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.07649590075016022, |
|
"rewards/margins": 0.014174291864037514, |
|
"rewards/rejected": 0.06232162192463875, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": -0.24203363060951233, |
|
"logits/rejected": -0.2396487444639206, |
|
"logps/chosen": -2022.40625, |
|
"logps/rejected": -1880.9495849609375, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.07169513404369354, |
|
"rewards/margins": 0.015681343153119087, |
|
"rewards/rejected": 0.05601378530263901, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": -0.2442229688167572, |
|
"logits/rejected": -0.2381734549999237, |
|
"logps/chosen": -2145.31689453125, |
|
"logps/rejected": -1823.568359375, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.059424418956041336, |
|
"rewards/margins": 0.017248233780264854, |
|
"rewards/rejected": 0.042176179587841034, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": -0.22975793480873108, |
|
"logits/rejected": -0.2311103641986847, |
|
"logps/chosen": -2078.482421875, |
|
"logps/rejected": -1920.7864990234375, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05944003537297249, |
|
"rewards/margins": 0.012296736240386963, |
|
"rewards/rejected": 0.04714329540729523, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": -0.22442571818828583, |
|
"logits/rejected": -0.24409636855125427, |
|
"logps/chosen": -1815.8922119140625, |
|
"logps/rejected": -1615.753173828125, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0425129197537899, |
|
"rewards/margins": 0.005890417378395796, |
|
"rewards/rejected": 0.03662250563502312, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": -0.23266033828258514, |
|
"logits/rejected": -0.22884194552898407, |
|
"logps/chosen": -2215.30078125, |
|
"logps/rejected": -1916.711669921875, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05568776652216911, |
|
"rewards/margins": 0.016158053651452065, |
|
"rewards/rejected": 0.0395297110080719, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -0.24221491813659668, |
|
"eval_logits/rejected": -0.24745041131973267, |
|
"eval_logps/chosen": -2163.659912109375, |
|
"eval_logps/rejected": -1837.5037841796875, |
|
"eval_loss": 0.04962093383073807, |
|
"eval_rewards/accuracies": 0.5684999823570251, |
|
"eval_rewards/chosen": 0.05274572595953941, |
|
"eval_rewards/margins": 0.013837032951414585, |
|
"eval_rewards/rejected": 0.0389086939394474, |
|
"eval_runtime": 510.7706, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": -0.2232085019350052, |
|
"logits/rejected": -0.2379104197025299, |
|
"logps/chosen": -2600.76953125, |
|
"logps/rejected": -2177.601806640625, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06885553896427155, |
|
"rewards/margins": 0.017164334654808044, |
|
"rewards/rejected": 0.0516912117600441, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": -0.2546294629573822, |
|
"logits/rejected": -0.2645355761051178, |
|
"logps/chosen": -1949.8623046875, |
|
"logps/rejected": -1700.5302734375, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.05458490923047066, |
|
"rewards/margins": 0.015815045684576035, |
|
"rewards/rejected": 0.03876986354589462, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": -0.2653118669986725, |
|
"logits/rejected": -0.26530537009239197, |
|
"logps/chosen": -2068.649169921875, |
|
"logps/rejected": -1767.718017578125, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.065969318151474, |
|
"rewards/margins": 0.021549370139837265, |
|
"rewards/rejected": 0.044419944286346436, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": -0.24164719879627228, |
|
"logits/rejected": -0.2417771816253662, |
|
"logps/chosen": -2113.025390625, |
|
"logps/rejected": -1844.6246337890625, |
|
"loss": 0.037, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.07654988765716553, |
|
"rewards/margins": 0.013212883844971657, |
|
"rewards/rejected": 0.06333700567483902, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": -0.22752514481544495, |
|
"logits/rejected": -0.23347719013690948, |
|
"logps/chosen": -1921.048828125, |
|
"logps/rejected": -1598.9993896484375, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06702348589897156, |
|
"rewards/margins": 0.018387358635663986, |
|
"rewards/rejected": 0.048636119812726974, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": -0.22021660208702087, |
|
"logits/rejected": -0.21698196232318878, |
|
"logps/chosen": -2022.852783203125, |
|
"logps/rejected": -1766.4072265625, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.06179197505116463, |
|
"rewards/margins": 0.01092799287289381, |
|
"rewards/rejected": 0.05086398124694824, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": -0.22338895499706268, |
|
"logits/rejected": -0.2199423760175705, |
|
"logps/chosen": -1993.245361328125, |
|
"logps/rejected": -1980.831787109375, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.06557862460613251, |
|
"rewards/margins": 0.014192071743309498, |
|
"rewards/rejected": 0.051386553794145584, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": -0.21173898875713348, |
|
"logits/rejected": -0.23590870201587677, |
|
"logps/chosen": -2017.1929931640625, |
|
"logps/rejected": -1657.1859130859375, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.055760689079761505, |
|
"rewards/margins": 0.013670523650944233, |
|
"rewards/rejected": 0.04209016636013985, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": -0.23114773631095886, |
|
"logits/rejected": -0.23729057610034943, |
|
"logps/chosen": -2105.003173828125, |
|
"logps/rejected": -1778.635498046875, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.06233568117022514, |
|
"rewards/margins": 0.0166020505130291, |
|
"rewards/rejected": 0.04573363438248634, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": -0.2337017059326172, |
|
"logits/rejected": -0.234249085187912, |
|
"logps/chosen": -2289.919189453125, |
|
"logps/rejected": -2049.482177734375, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.06871043145656586, |
|
"rewards/margins": 0.010876113548874855, |
|
"rewards/rejected": 0.05783431604504585, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -0.22742050886154175, |
|
"eval_logits/rejected": -0.23352740705013275, |
|
"eval_logps/chosen": -2154.34033203125, |
|
"eval_logps/rejected": -1829.7928466796875, |
|
"eval_loss": 0.04929284378886223, |
|
"eval_rewards/accuracies": 0.5674999952316284, |
|
"eval_rewards/chosen": 0.06206566095352173, |
|
"eval_rewards/margins": 0.015445946715772152, |
|
"eval_rewards/rejected": 0.0466197207570076, |
|
"eval_runtime": 510.6117, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": -0.22113287448883057, |
|
"logits/rejected": -0.21311786770820618, |
|
"logps/chosen": -2256.49951171875, |
|
"logps/rejected": -1961.322509765625, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06963467597961426, |
|
"rewards/margins": 0.015023264102637768, |
|
"rewards/rejected": 0.05461140722036362, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": -0.24658381938934326, |
|
"logits/rejected": -0.2602604925632477, |
|
"logps/chosen": -1738.8958740234375, |
|
"logps/rejected": -1530.5950927734375, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0541699044406414, |
|
"rewards/margins": 0.007052128203213215, |
|
"rewards/rejected": 0.04711777716875076, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": -0.23341718316078186, |
|
"logits/rejected": -0.2346893846988678, |
|
"logps/chosen": -2004.2154541015625, |
|
"logps/rejected": -1676.0980224609375, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.06295724958181381, |
|
"rewards/margins": 0.0180866289883852, |
|
"rewards/rejected": 0.04487061873078346, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": -0.23593036830425262, |
|
"logits/rejected": -0.2430458515882492, |
|
"logps/chosen": -2228.714111328125, |
|
"logps/rejected": -1928.757080078125, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.07435286045074463, |
|
"rewards/margins": 0.01624133810400963, |
|
"rewards/rejected": 0.0581115186214447, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": -0.22192791104316711, |
|
"logits/rejected": -0.23522309958934784, |
|
"logps/chosen": -2106.592041015625, |
|
"logps/rejected": -1706.7427978515625, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0745842233300209, |
|
"rewards/margins": 0.023264039307832718, |
|
"rewards/rejected": 0.05132018402218819, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": -0.22204573452472687, |
|
"logits/rejected": -0.22397270798683167, |
|
"logps/chosen": -1921.546875, |
|
"logps/rejected": -1686.9296875, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.060963042080402374, |
|
"rewards/margins": 0.011381834745407104, |
|
"rewards/rejected": 0.04958119988441467, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": -0.22597956657409668, |
|
"logits/rejected": -0.24938449263572693, |
|
"logps/chosen": -1889.419189453125, |
|
"logps/rejected": -1659.8782958984375, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06332211196422577, |
|
"rewards/margins": 0.013857582584023476, |
|
"rewards/rejected": 0.04946453124284744, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": -0.21659445762634277, |
|
"logits/rejected": -0.22937150299549103, |
|
"logps/chosen": -2046.7357177734375, |
|
"logps/rejected": -1769.0198974609375, |
|
"loss": 0.0451, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06872855126857758, |
|
"rewards/margins": 0.014504766091704369, |
|
"rewards/rejected": 0.05422378331422806, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": -0.21276862919330597, |
|
"logits/rejected": -0.2097276896238327, |
|
"logps/chosen": -2136.18505859375, |
|
"logps/rejected": -2049.29931640625, |
|
"loss": 0.0536, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.07395701855421066, |
|
"rewards/margins": 0.006907849106937647, |
|
"rewards/rejected": 0.06704917550086975, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": -0.22508184611797333, |
|
"logits/rejected": -0.23881450295448303, |
|
"logps/chosen": -1998.213623046875, |
|
"logps/rejected": -1759.222412109375, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06664810329675674, |
|
"rewards/margins": 0.011612234637141228, |
|
"rewards/rejected": 0.05503587797284126, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -0.22299662232398987, |
|
"eval_logits/rejected": -0.2297811657190323, |
|
"eval_logps/chosen": -2145.159423828125, |
|
"eval_logps/rejected": -1821.890869140625, |
|
"eval_loss": 0.049171119928359985, |
|
"eval_rewards/accuracies": 0.5705000162124634, |
|
"eval_rewards/chosen": 0.07124640792608261, |
|
"eval_rewards/margins": 0.01672479324042797, |
|
"eval_rewards/rejected": 0.05452162027359009, |
|
"eval_runtime": 510.5649, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": -0.21083417534828186, |
|
"logits/rejected": -0.21836061775684357, |
|
"logps/chosen": -1921.0771484375, |
|
"logps/rejected": -1615.395263671875, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.06516659259796143, |
|
"rewards/margins": 0.010829558596014977, |
|
"rewards/rejected": 0.0543370358645916, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": -0.24559417366981506, |
|
"logits/rejected": -0.23943760991096497, |
|
"logps/chosen": -1962.534423828125, |
|
"logps/rejected": -1884.5029296875, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.06632138788700104, |
|
"rewards/margins": 0.007880722172558308, |
|
"rewards/rejected": 0.0584406740963459, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": -0.1999385952949524, |
|
"logits/rejected": -0.22029852867126465, |
|
"logps/chosen": -2002.773193359375, |
|
"logps/rejected": -1645.963623046875, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.06611990928649902, |
|
"rewards/margins": 0.018743688240647316, |
|
"rewards/rejected": 0.04737623408436775, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": -0.23098058998584747, |
|
"logits/rejected": -0.2513691782951355, |
|
"logps/chosen": -2234.155517578125, |
|
"logps/rejected": -1985.674560546875, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.08107596635818481, |
|
"rewards/margins": 0.014828977175056934, |
|
"rewards/rejected": 0.06624698638916016, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": -0.18273136019706726, |
|
"logits/rejected": -0.18867138028144836, |
|
"logps/chosen": -2058.58349609375, |
|
"logps/rejected": -1610.1470947265625, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.06953487545251846, |
|
"rewards/margins": 0.014529886655509472, |
|
"rewards/rejected": 0.055004991590976715, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": -0.21245570480823517, |
|
"logits/rejected": -0.23336832225322723, |
|
"logps/chosen": -1910.864013671875, |
|
"logps/rejected": -1668.5755615234375, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.06216276437044144, |
|
"rewards/margins": 0.015415112487971783, |
|
"rewards/rejected": 0.04674764350056648, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": -0.2325417697429657, |
|
"logits/rejected": -0.2111097276210785, |
|
"logps/chosen": -2226.126220703125, |
|
"logps/rejected": -2146.695556640625, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.07398100197315216, |
|
"rewards/margins": 0.01466774009168148, |
|
"rewards/rejected": 0.05931326001882553, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": -0.23489132523536682, |
|
"logits/rejected": -0.26181578636169434, |
|
"logps/chosen": -2163.59130859375, |
|
"logps/rejected": -1799.791015625, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0649464800953865, |
|
"rewards/margins": 0.013443303294479847, |
|
"rewards/rejected": 0.051503174006938934, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": -0.2324393093585968, |
|
"logits/rejected": -0.2406429946422577, |
|
"logps/chosen": -2243.14990234375, |
|
"logps/rejected": -1793.9261474609375, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.07493821531534195, |
|
"rewards/margins": 0.016798479482531548, |
|
"rewards/rejected": 0.05813973397016525, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": -0.2428218573331833, |
|
"logits/rejected": -0.22781512141227722, |
|
"logps/chosen": -1967.1754150390625, |
|
"logps/rejected": -1719.901123046875, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.05938352271914482, |
|
"rewards/margins": 0.014253886416554451, |
|
"rewards/rejected": 0.04512963443994522, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -0.21960072219371796, |
|
"eval_logits/rejected": -0.22588692605495453, |
|
"eval_logps/chosen": -2149.13818359375, |
|
"eval_logps/rejected": -1825.278564453125, |
|
"eval_loss": 0.04918248951435089, |
|
"eval_rewards/accuracies": 0.5674999952316284, |
|
"eval_rewards/chosen": 0.06726768612861633, |
|
"eval_rewards/margins": 0.016133680939674377, |
|
"eval_rewards/rejected": 0.05113400146365166, |
|
"eval_runtime": 510.6325, |
|
"eval_samples_per_second": 3.917, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": -0.1846579611301422, |
|
"logits/rejected": -0.21179255843162537, |
|
"logps/chosen": -2128.083740234375, |
|
"logps/rejected": -1733.182861328125, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06305380910634995, |
|
"rewards/margins": 0.013125176541507244, |
|
"rewards/rejected": 0.049928631633520126, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": -0.19069206714630127, |
|
"logits/rejected": -0.2094193696975708, |
|
"logps/chosen": -2153.69580078125, |
|
"logps/rejected": -1683.8616943359375, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.06470336019992828, |
|
"rewards/margins": 0.014464011415839195, |
|
"rewards/rejected": 0.05023934692144394, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": -0.20567326247692108, |
|
"logits/rejected": -0.22026868164539337, |
|
"logps/chosen": -2285.531005859375, |
|
"logps/rejected": -1900.779052734375, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.07268913835287094, |
|
"rewards/margins": 0.022747965529561043, |
|
"rewards/rejected": 0.04994116351008415, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": -0.21483811736106873, |
|
"logits/rejected": -0.23524871468544006, |
|
"logps/chosen": -2039.6002197265625, |
|
"logps/rejected": -1728.5648193359375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.06144179031252861, |
|
"rewards/margins": 0.012769539840519428, |
|
"rewards/rejected": 0.04867224767804146, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": -0.209224671125412, |
|
"logits/rejected": -0.22041518986225128, |
|
"logps/chosen": -1966.912353515625, |
|
"logps/rejected": -1770.239990234375, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0644414871931076, |
|
"rewards/margins": 0.014524770900607109, |
|
"rewards/rejected": 0.049916718155145645, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": -0.1998235136270523, |
|
"logits/rejected": -0.205234095454216, |
|
"logps/chosen": -2081.581787109375, |
|
"logps/rejected": -1733.4261474609375, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07012965530157089, |
|
"rewards/margins": 0.018411414697766304, |
|
"rewards/rejected": 0.05171824246644974, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": -0.21281655132770538, |
|
"logits/rejected": -0.2385600358247757, |
|
"logps/chosen": -2208.79736328125, |
|
"logps/rejected": -1743.3955078125, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06378593295812607, |
|
"rewards/margins": 0.012727012857794762, |
|
"rewards/rejected": 0.051058925688266754, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": -0.22884276509284973, |
|
"logits/rejected": -0.2317463457584381, |
|
"logps/chosen": -2076.50048828125, |
|
"logps/rejected": -1827.575439453125, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06807791441679001, |
|
"rewards/margins": 0.011940672062337399, |
|
"rewards/rejected": 0.05613725259900093, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": -0.20104601979255676, |
|
"logits/rejected": -0.19782570004463196, |
|
"logps/chosen": -1880.1734619140625, |
|
"logps/rejected": -1685.8695068359375, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.05542607977986336, |
|
"rewards/margins": 0.011624794453382492, |
|
"rewards/rejected": 0.04380128160119057, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": -0.1951800137758255, |
|
"logits/rejected": -0.21924810111522675, |
|
"logps/chosen": -1900.6998291015625, |
|
"logps/rejected": -1563.48876953125, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.05453474447131157, |
|
"rewards/margins": 0.008965181186795235, |
|
"rewards/rejected": 0.045569561421871185, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -0.21762163937091827, |
|
"eval_logits/rejected": -0.2241181582212448, |
|
"eval_logps/chosen": -2148.109619140625, |
|
"eval_logps/rejected": -1824.3348388671875, |
|
"eval_loss": 0.04916713759303093, |
|
"eval_rewards/accuracies": 0.5690000057220459, |
|
"eval_rewards/chosen": 0.0682961568236351, |
|
"eval_rewards/margins": 0.016218481585383415, |
|
"eval_rewards/rejected": 0.05207766965031624, |
|
"eval_runtime": 510.6606, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.979, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": -0.1938626617193222, |
|
"logits/rejected": -0.21109886467456818, |
|
"logps/chosen": -1975.074951171875, |
|
"logps/rejected": -1626.160888671875, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.06530580669641495, |
|
"rewards/margins": 0.017866965383291245, |
|
"rewards/rejected": 0.0474388413131237, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": -0.20253758132457733, |
|
"logits/rejected": -0.21794748306274414, |
|
"logps/chosen": -2259.384765625, |
|
"logps/rejected": -2048.346435546875, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.07413917034864426, |
|
"rewards/margins": 0.02017979882657528, |
|
"rewards/rejected": 0.053959377110004425, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": -0.19098524749279022, |
|
"logits/rejected": -0.19925786554813385, |
|
"logps/chosen": -2076.921630859375, |
|
"logps/rejected": -1773.484619140625, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.06547501683235168, |
|
"rewards/margins": 0.016652025282382965, |
|
"rewards/rejected": 0.04882299154996872, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": -0.20272760093212128, |
|
"logits/rejected": -0.23647110164165497, |
|
"logps/chosen": -2243.960205078125, |
|
"logps/rejected": -2028.1578369140625, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07614084333181381, |
|
"rewards/margins": 0.01566244289278984, |
|
"rewards/rejected": 0.06047840043902397, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": -0.22630052268505096, |
|
"logits/rejected": -0.20891804993152618, |
|
"logps/chosen": -1954.311279296875, |
|
"logps/rejected": -1862.181640625, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06258489936590195, |
|
"rewards/margins": 0.01086291205137968, |
|
"rewards/rejected": 0.05172199010848999, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": -0.21302291750907898, |
|
"logits/rejected": -0.22670722007751465, |
|
"logps/chosen": -1873.691650390625, |
|
"logps/rejected": -1755.8060302734375, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05816579982638359, |
|
"rewards/margins": 0.013311423361301422, |
|
"rewards/rejected": 0.04485438019037247, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": -0.1987680345773697, |
|
"logits/rejected": -0.2282913625240326, |
|
"logps/chosen": -1935.877197265625, |
|
"logps/rejected": -1745.5570068359375, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06320817768573761, |
|
"rewards/margins": 0.013358126394450665, |
|
"rewards/rejected": 0.049850039184093475, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": -0.21312955021858215, |
|
"logits/rejected": -0.22611579298973083, |
|
"logps/chosen": -2255.653564453125, |
|
"logps/rejected": -1939.3330078125, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.07814554870128632, |
|
"rewards/margins": 0.018966957926750183, |
|
"rewards/rejected": 0.05917859077453613, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": -0.2335490882396698, |
|
"logits/rejected": -0.22842903435230255, |
|
"logps/chosen": -2171.2451171875, |
|
"logps/rejected": -1897.8079833984375, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.07153952866792679, |
|
"rewards/margins": 0.017695123329758644, |
|
"rewards/rejected": 0.053844403475522995, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": -0.18400521576404572, |
|
"logits/rejected": -0.20925450325012207, |
|
"logps/chosen": -2247.09765625, |
|
"logps/rejected": -1860.405517578125, |
|
"loss": 0.05, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0739186555147171, |
|
"rewards/margins": 0.018731053918600082, |
|
"rewards/rejected": 0.05518760159611702, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -0.21872195601463318, |
|
"eval_logits/rejected": -0.22539223730564117, |
|
"eval_logps/chosen": -2148.457763671875, |
|
"eval_logps/rejected": -1824.6458740234375, |
|
"eval_loss": 0.04917627200484276, |
|
"eval_rewards/accuracies": 0.5669999718666077, |
|
"eval_rewards/chosen": 0.06794830411672592, |
|
"eval_rewards/margins": 0.016181621700525284, |
|
"eval_rewards/rejected": 0.05176668241620064, |
|
"eval_runtime": 511.1542, |
|
"eval_samples_per_second": 3.913, |
|
"eval_steps_per_second": 0.978, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.21931472420692444, |
|
"logits/rejected": -0.21789617836475372, |
|
"logps/chosen": -2259.647216796875, |
|
"logps/rejected": -1940.0595703125, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.07358353585004807, |
|
"rewards/margins": 0.015202896669507027, |
|
"rewards/rejected": 0.05838064104318619, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05238237046290443, |
|
"train_runtime": 26355.2814, |
|
"train_samples_per_second": 1.16, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|