diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,5689 +1,1908 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.9984268484530676, + "epoch": 0.9994756161510225, "eval_steps": 100, - "global_step": 2859, + "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, - "grad_norm": 11.303338968797107, - "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.16597549617290497, - "log_odds_ratio": -0.6960083246231079, - "logits/chosen": -2.5440375804901123, - "logits/rejected": -2.532742977142334, - "logps/chosen": -0.9999498128890991, - "logps/rejected": -1.0999202728271484, - "loss": 2.7435, - "nll_loss": 2.655998706817627, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.049997489899396896, - "rewards/margins": 0.004998520482331514, - "rewards/rejected": -0.054996006190776825, + "grad_norm": 281.5632535171625, + "learning_rate": 7.000000000000001e-07, + "log_odds_chosen": 0.14837229251861572, + "log_odds_ratio": -0.7063122987747192, + "logits/chosen": -2.4233744144439697, + "logits/rejected": -2.3922557830810547, + "logps/chosen": -1.0665283203125, + "logps/rejected": -1.164435625076294, + "loss": 3.7384, + "nll_loss": 3.6487019062042236, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.05332641676068306, + "rewards/margins": 0.004895367659628391, + "rewards/rejected": -0.058221787214279175, "step": 10 }, { "epoch": 0.02097535395909806, - "grad_norm": 3.296785739531489, - "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.19497092068195343, - "log_odds_ratio": -0.6663684844970703, - "logits/chosen": -3.153244733810425, - "logits/rejected": -3.176297903060913, - "logps/chosen": -0.7618023753166199, - "logps/rejected": -0.8721799850463867, - "loss": 0.5628, - "nll_loss": 0.5223663449287415, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03809012100100517, - "rewards/margins": 0.005518879741430283, - "rewards/rejected": -0.043609000742435455, + "grad_norm": 3.6095114671977337, + "learning_rate": 1.4000000000000001e-06, + "log_odds_chosen": 0.18771903216838837, + "log_odds_ratio": -0.6616674661636353, + "logits/chosen": -2.669743061065674, + "logits/rejected": -2.6637511253356934, + "logps/chosen": -0.8115625381469727, + "logps/rejected": -0.9194537401199341, + "loss": 0.598, + "nll_loss": 0.5553613901138306, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04057813063263893, + "rewards/margins": 0.005394552834331989, + "rewards/rejected": -0.045972686260938644, "step": 20 }, { "epoch": 0.03146303093864709, - "grad_norm": 2.5096714885559264, - "learning_rate": 6e-06, - "log_odds_chosen": 0.23512229323387146, - "log_odds_ratio": -0.6553729772567749, - "logits/chosen": -2.9705119132995605, - "logits/rejected": -2.944556713104248, - "logps/chosen": -0.8099643588066101, - "logps/rejected": -0.9404464960098267, - "loss": 0.5331, - "nll_loss": 0.4915856420993805, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.040498219430446625, - "rewards/margins": 0.0065241060219705105, - "rewards/rejected": -0.04702232405543327, + "grad_norm": 2.6104338509446743, + "learning_rate": 2.1e-06, + "log_odds_chosen": 0.24361269176006317, + "log_odds_ratio": -0.6484603881835938, + "logits/chosen": -2.8152480125427246, + "logits/rejected": -2.770486831665039, + "logps/chosen": -0.7975724339485168, + "logps/rejected": -0.9327106475830078, + "loss": 0.539, + "nll_loss": 0.4975182116031647, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03987862169742584, + "rewards/margins": 0.006756913848221302, + "rewards/rejected": -0.04663553088903427, "step": 30 }, { "epoch": 0.04195070791819612, - "grad_norm": 2.5670929503530138, - "learning_rate": 8.000000000000001e-06, - "log_odds_chosen": 0.1703537404537201, - "log_odds_ratio": -0.6904168128967285, - "logits/chosen": -2.8517043590545654, - "logits/rejected": -2.83884334564209, - "logps/chosen": -0.805575966835022, - "logps/rejected": -0.9237464666366577, - "loss": 0.5194, - "nll_loss": 0.4799742102622986, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04027879983186722, - "rewards/margins": 0.005908523220568895, - "rewards/rejected": -0.046187322586774826, + "grad_norm": 2.6082713320666966, + "learning_rate": 2.8000000000000003e-06, + "log_odds_chosen": 0.18453697860240936, + "log_odds_ratio": -0.6863341331481934, + "logits/chosen": -2.7431702613830566, + "logits/rejected": -2.721076488494873, + "logps/chosen": -0.7775384783744812, + "logps/rejected": -0.8990561366081238, + "loss": 0.5182, + "nll_loss": 0.4802665710449219, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.03887692838907242, + "rewards/margins": 0.006075879093259573, + "rewards/rejected": -0.04495280981063843, "step": 40 }, { "epoch": 0.05243838489774515, - "grad_norm": 2.8257696541784587, - "learning_rate": 1e-05, - "log_odds_chosen": 0.28843408823013306, - "log_odds_ratio": -0.6763556599617004, - "logits/chosen": -2.7286221981048584, - "logits/rejected": -2.72869610786438, - "logps/chosen": -0.787534236907959, - "logps/rejected": -0.968492865562439, - "loss": 0.5419, - "nll_loss": 0.48419374227523804, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.03937670961022377, - "rewards/margins": 0.009047931991517544, - "rewards/rejected": -0.04842463880777359, + "grad_norm": 2.8319159240383356, + "learning_rate": 3.5e-06, + "log_odds_chosen": 0.2895735204219818, + "log_odds_ratio": -0.6829751133918762, + "logits/chosen": -2.6645712852478027, + "logits/rejected": -2.6532058715820312, + "logps/chosen": -0.7420316934585571, + "logps/rejected": -0.92218017578125, + "loss": 0.5346, + "nll_loss": 0.4737791419029236, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03710158169269562, + "rewards/margins": 0.009007426910102367, + "rewards/rejected": -0.04610900953412056, "step": 50 }, { "epoch": 0.06292606187729417, - "grad_norm": 2.7270372711002624, - "learning_rate": 1.2e-05, - "log_odds_chosen": 0.2020198553800583, - "log_odds_ratio": -0.6800572872161865, - "logits/chosen": -2.896289110183716, - "logits/rejected": -2.8839545249938965, - "logps/chosen": -0.8010624051094055, - "logps/rejected": -0.9179455637931824, - "loss": 0.5456, - "nll_loss": 0.5158990621566772, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.040053121745586395, - "rewards/margins": 0.005844158586114645, - "rewards/rejected": -0.04589728266000748, + "grad_norm": 2.702391106634465, + "learning_rate": 4.2e-06, + "log_odds_chosen": 0.23618292808532715, + "log_odds_ratio": -0.6679760217666626, + "logits/chosen": -2.7234179973602295, + "logits/rejected": -2.701585292816162, + "logps/chosen": -0.7408851385116577, + "logps/rejected": -0.8674576878547668, + "loss": 0.5296, + "nll_loss": 0.5001371502876282, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.03704426437616348, + "rewards/margins": 0.0063286214135587215, + "rewards/rejected": -0.04337288811802864, "step": 60 }, { "epoch": 0.07341373885684321, - "grad_norm": 2.7197204143491605, - "learning_rate": 1.4e-05, - "log_odds_chosen": 0.1937415450811386, - "log_odds_ratio": -0.6942794919013977, - "logits/chosen": -2.8848633766174316, - "logits/rejected": -2.905164957046509, - "logps/chosen": -0.8219146728515625, - "logps/rejected": -0.9291160702705383, - "loss": 0.5412, - "nll_loss": 0.5311218500137329, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.041095733642578125, - "rewards/margins": 0.005360070150345564, - "rewards/rejected": -0.046455807983875275, + "grad_norm": 2.7579557747488237, + "learning_rate": 4.9e-06, + "log_odds_chosen": 0.1982727348804474, + "log_odds_ratio": -0.7039018869400024, + "logits/chosen": -2.716829776763916, + "logits/rejected": -2.7165746688842773, + "logps/chosen": -0.7602167129516602, + "logps/rejected": -0.8683260679244995, + "loss": 0.5179, + "nll_loss": 0.5095189213752747, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03801083564758301, + "rewards/margins": 0.005405469331890345, + "rewards/rejected": -0.043416302651166916, "step": 70 }, { "epoch": 0.08390141583639224, - "grad_norm": 1049.2102246099553, - "learning_rate": 1.6000000000000003e-05, - "log_odds_chosen": 0.1753607988357544, - "log_odds_ratio": -0.6886225938796997, - "logits/chosen": -2.6637063026428223, - "logits/rejected": -2.637396812438965, - "logps/chosen": -0.8933579325675964, - "logps/rejected": -1.020629644393921, - "loss": 1.0694, - "nll_loss": 0.9787748456001282, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04466789960861206, - "rewards/margins": 0.006363583263009787, - "rewards/rejected": -0.051031481474637985, + "grad_norm": 2.7333788754363826, + "learning_rate": 5.600000000000001e-06, + "log_odds_chosen": 0.19610878825187683, + "log_odds_ratio": -0.6825613379478455, + "logits/chosen": -2.6934926509857178, + "logits/rejected": -2.6538023948669434, + "logps/chosen": -0.8004279136657715, + "logps/rejected": -0.9359849095344543, + "loss": 0.5198, + "nll_loss": 0.44797396659851074, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.040021397173404694, + "rewards/margins": 0.006777846720069647, + "rewards/rejected": -0.04679924249649048, "step": 80 }, { "epoch": 0.09438909281594127, - "grad_norm": 4.011701524085754, - "learning_rate": 1.8e-05, - "log_odds_chosen": 0.2628815174102783, - "log_odds_ratio": -0.6731477975845337, - "logits/chosen": -3.106489658355713, - "logits/rejected": -3.0954391956329346, - "logps/chosen": -0.9435924291610718, - "logps/rejected": -1.1041589975357056, - "loss": 0.5766, - "nll_loss": 0.5112682580947876, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04717962443828583, - "rewards/margins": 0.008028322830796242, - "rewards/rejected": -0.05520794540643692, + "grad_norm": 2.643892428655997, + "learning_rate": 6.3e-06, + "log_odds_chosen": 0.32694971561431885, + "log_odds_ratio": -0.6449785828590393, + "logits/chosen": -2.6064088344573975, + "logits/rejected": -2.600590229034424, + "logps/chosen": -0.7779799699783325, + "logps/rejected": -0.970491886138916, + "loss": 0.5108, + "nll_loss": 0.4519652724266052, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.03889899700880051, + "rewards/margins": 0.009625596925616264, + "rewards/rejected": -0.04852459207177162, "step": 90 }, { "epoch": 0.1048767697954903, - "grad_norm": 5.340561330006851, - "learning_rate": 2e-05, - "log_odds_chosen": 0.17503713071346283, - "log_odds_ratio": -0.6751121282577515, - "logits/chosen": -3.3266518115997314, - "logits/rejected": -3.3420982360839844, - "logps/chosen": -0.8886896371841431, - "logps/rejected": -1.0002682209014893, - "loss": 0.5668, - "nll_loss": 0.5238600969314575, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.044434480369091034, - "rewards/margins": 0.005578924436122179, - "rewards/rejected": -0.050013404339551926, + "grad_norm": 2.7386435335682178, + "learning_rate": 7e-06, + "log_odds_chosen": 0.24293240904808044, + "log_odds_ratio": -0.65534907579422, + "logits/chosen": -2.800649881362915, + "logits/rejected": -2.783020257949829, + "logps/chosen": -0.7912999391555786, + "logps/rejected": -0.931311309337616, + "loss": 0.5226, + "nll_loss": 0.4863203167915344, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.03956499695777893, + "rewards/margins": 0.007000570185482502, + "rewards/rejected": -0.04656556248664856, "step": 100 }, { "epoch": 0.1048767697954903, - "eval_log_odds_chosen": 0.21844430267810822, - "eval_log_odds_ratio": -0.6529861688613892, - "eval_logits/chosen": -3.3082144260406494, - "eval_logits/rejected": -3.3147807121276855, - "eval_logps/chosen": -0.9112777709960938, - "eval_logps/rejected": -1.0580321550369263, - "eval_loss": 0.5842872858047485, - "eval_nll_loss": 0.5515953898429871, - "eval_rewards/accuracies": 0.6150793433189392, - "eval_rewards/chosen": -0.04556388780474663, - "eval_rewards/margins": 0.007337724789977074, - "eval_rewards/rejected": -0.05290161445736885, - "eval_runtime": 138.2645, - "eval_samples_per_second": 14.422, - "eval_steps_per_second": 0.456, + "eval_log_odds_chosen": 0.2873421609401703, + "eval_log_odds_ratio": -0.632556140422821, + "eval_logits/chosen": -2.7859702110290527, + "eval_logits/rejected": -2.758275270462036, + "eval_logps/chosen": -0.7728292942047119, + "eval_logps/rejected": -0.9448140263557434, + "eval_loss": 0.5279971957206726, + "eval_nll_loss": 0.49532046914100647, + "eval_rewards/accuracies": 0.6329365372657776, + "eval_rewards/chosen": -0.03864146023988724, + "eval_rewards/margins": 0.008599241264164448, + "eval_rewards/rejected": -0.04724070429801941, + "eval_runtime": 137.6903, + "eval_samples_per_second": 14.482, + "eval_steps_per_second": 0.458, "step": 100 }, { "epoch": 0.11536444677503933, - "grad_norm": 2.8100337089038514, - "learning_rate": 1.9069251784911845e-05, - "log_odds_chosen": 0.2544933259487152, - "log_odds_ratio": -0.643945038318634, - "logits/chosen": -3.2667174339294434, - "logits/rejected": -3.310918092727661, - "logps/chosen": -0.8447545170783997, - "logps/rejected": -1.009132981300354, - "loss": 0.5651, - "nll_loss": 0.5105677843093872, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.042237721383571625, - "rewards/margins": 0.008218927308917046, - "rewards/rejected": -0.05045665428042412, + "grad_norm": 3.1992530570673416, + "learning_rate": 6.674238124719146e-06, + "log_odds_chosen": 0.34574735164642334, + "log_odds_ratio": -0.612960934638977, + "logits/chosen": -2.770359516143799, + "logits/rejected": -2.785818099975586, + "logps/chosen": -0.7360346913337708, + "logps/rejected": -0.9339498281478882, + "loss": 0.516, + "nll_loss": 0.46663737297058105, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.03680173680186272, + "rewards/margins": 0.009895754046738148, + "rewards/rejected": -0.04669748991727829, "step": 110 }, { "epoch": 0.12585212375458835, - "grad_norm": 2.2193460343172986, - "learning_rate": 1.825741858350554e-05, - "log_odds_chosen": 0.24397364258766174, - "log_odds_ratio": -0.6682508587837219, - "logits/chosen": -3.193361282348633, - "logits/rejected": -3.243128538131714, - "logps/chosen": -0.8714381456375122, - "logps/rejected": -1.0333614349365234, - "loss": 0.6091, - "nll_loss": 0.5700744390487671, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04357190802693367, - "rewards/margins": 0.008096165955066681, - "rewards/rejected": -0.051668066531419754, + "grad_norm": 2.389888529611206, + "learning_rate": 6.390096504226938e-06, + "log_odds_chosen": 0.3332720696926117, + "log_odds_ratio": -0.629552960395813, + "logits/chosen": -2.765531063079834, + "logits/rejected": -2.7438697814941406, + "logps/chosen": -0.7498644590377808, + "logps/rejected": -0.9586297273635864, + "loss": 0.5424, + "nll_loss": 0.5031455159187317, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.03749322146177292, + "rewards/margins": 0.010438265278935432, + "rewards/rejected": -0.0479314923286438, "step": 120 }, { "epoch": 0.1363398007341374, - "grad_norm": 2.3414921674264555, - "learning_rate": 1.7541160386140587e-05, - "log_odds_chosen": 0.2272050678730011, - "log_odds_ratio": -0.6708214282989502, - "logits/chosen": -3.1920104026794434, - "logits/rejected": -3.211714267730713, - "logps/chosen": -0.8986352682113647, - "logps/rejected": -1.0474598407745361, - "loss": 0.5886, - "nll_loss": 0.552306056022644, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.044931765645742416, - "rewards/margins": 0.007441227789968252, - "rewards/rejected": -0.052372999489307404, + "grad_norm": 2.352563456984363, + "learning_rate": 6.139406135149204e-06, + "log_odds_chosen": 0.22595734894275665, + "log_odds_ratio": -0.6784238219261169, + "logits/chosen": -2.7593860626220703, + "logits/rejected": -2.743048667907715, + "logps/chosen": -0.7811408042907715, + "logps/rejected": -0.9164878726005554, + "loss": 0.5343, + "nll_loss": 0.49365147948265076, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.039057038724422455, + "rewards/margins": 0.006767353508621454, + "rewards/rejected": -0.04582439363002777, "step": 130 }, { "epoch": 0.14682747771368643, - "grad_norm": 2.3255085925590597, - "learning_rate": 1.6903085094570334e-05, - "log_odds_chosen": 0.22232067584991455, - "log_odds_ratio": -0.6680520176887512, - "logits/chosen": -3.1715519428253174, - "logits/rejected": -3.198253631591797, - "logps/chosen": -0.9551104307174683, - "logps/rejected": -1.1022988557815552, - "loss": 0.5878, - "nll_loss": 0.5523446798324585, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04775552451610565, - "rewards/margins": 0.007359415292739868, - "rewards/rejected": -0.05511493608355522, + "grad_norm": 2.436711404156596, + "learning_rate": 5.916079783099616e-06, + "log_odds_chosen": 0.2472628802061081, + "log_odds_ratio": -0.6597720384597778, + "logits/chosen": -2.6898269653320312, + "logits/rejected": -2.669379711151123, + "logps/chosen": -0.8302755355834961, + "logps/rejected": -0.9775524139404297, + "loss": 0.5262, + "nll_loss": 0.49079251289367676, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.041513778269290924, + "rewards/margins": 0.007363851182162762, + "rewards/rejected": -0.04887763410806656, "step": 140 }, { "epoch": 0.15731515469323545, - "grad_norm": 2.6729814886854766, - "learning_rate": 1.6329931618554523e-05, - "log_odds_chosen": 0.17247287929058075, - "log_odds_ratio": -0.7340894341468811, - "logits/chosen": -3.102067470550537, - "logits/rejected": -3.1263070106506348, - "logps/chosen": -0.9946192502975464, - "logps/rejected": -1.1088117361068726, - "loss": 0.5489, - "nll_loss": 0.5492355823516846, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04973096773028374, - "rewards/margins": 0.005709617864340544, - "rewards/rejected": -0.05544058233499527, + "grad_norm": 2.622232308829729, + "learning_rate": 5.715476066494083e-06, + "log_odds_chosen": 0.23396515846252441, + "log_odds_ratio": -0.7018890976905823, + "logits/chosen": -2.6906025409698486, + "logits/rejected": -2.685272455215454, + "logps/chosen": -0.8395276069641113, + "logps/rejected": -0.9926843643188477, + "loss": 0.4873, + "nll_loss": 0.4751507639884949, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.041976384818553925, + "rewards/margins": 0.007657832466065884, + "rewards/rejected": -0.04963421821594238, "step": 150 }, { "epoch": 0.16780283167278448, - "grad_norm": 2.603021066142599, - "learning_rate": 1.5811388300841898e-05, - "log_odds_chosen": 0.2041763812303543, - "log_odds_ratio": -0.6666288375854492, - "logits/chosen": -3.0764000415802, - "logits/rejected": -3.1064279079437256, - "logps/chosen": -0.9137493968009949, - "logps/rejected": -1.0383034944534302, - "loss": 0.6063, - "nll_loss": 0.5569471120834351, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.045687466859817505, - "rewards/margins": 0.006227707955986261, - "rewards/rejected": -0.05191517621278763, + "grad_norm": 2.5349291816098587, + "learning_rate": 5.533985905294663e-06, + "log_odds_chosen": 0.23518291115760803, + "log_odds_ratio": -0.64958655834198, + "logits/chosen": -2.7026143074035645, + "logits/rejected": -2.690053701400757, + "logps/chosen": -0.7785183191299438, + "logps/rejected": -0.9093867540359497, + "loss": 0.5435, + "nll_loss": 0.4887324869632721, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.038925912231206894, + "rewards/margins": 0.006543423049151897, + "rewards/rejected": -0.045469339936971664, "step": 160 }, { "epoch": 0.1782905086523335, - "grad_norm": 2.4919552056925416, - "learning_rate": 1.533929977694741e-05, - "log_odds_chosen": 0.25588172674179077, - "log_odds_ratio": -0.6607967019081116, - "logits/chosen": -3.1293396949768066, - "logits/rejected": -3.1606574058532715, - "logps/chosen": -0.8986794352531433, - "logps/rejected": -1.0667051076889038, - "loss": 0.5845, - "nll_loss": 0.5496193766593933, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.044933974742889404, - "rewards/margins": 0.008401280269026756, - "rewards/rejected": -0.05333525687456131, + "grad_norm": 2.421225073724309, + "learning_rate": 5.368754921931593e-06, + "log_odds_chosen": 0.3210265636444092, + "log_odds_ratio": -0.6400843262672424, + "logits/chosen": -2.7624573707580566, + "logits/rejected": -2.7493152618408203, + "logps/chosen": -0.7663661241531372, + "logps/rejected": -0.9589449763298035, + "loss": 0.5263, + "nll_loss": 0.4972688555717468, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03831830993294716, + "rewards/margins": 0.009628941304981709, + "rewards/rejected": -0.047947246581315994, "step": 170 }, { "epoch": 0.18877818563188253, - "grad_norm": 2.4600198980545915, - "learning_rate": 1.49071198499986e-05, - "log_odds_chosen": 0.27393144369125366, - "log_odds_ratio": -0.6479635238647461, - "logits/chosen": -3.080091714859009, - "logits/rejected": -3.103672504425049, - "logps/chosen": -0.9190357327461243, - "logps/rejected": -1.0871737003326416, - "loss": 0.5676, - "nll_loss": 0.550677478313446, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.045951783657073975, - "rewards/margins": 0.00840689055621624, - "rewards/rejected": -0.05435867980122566, + "grad_norm": 2.413880479048562, + "learning_rate": 5.217491947499509e-06, + "log_odds_chosen": 0.29789280891418457, + "log_odds_ratio": -0.6485607028007507, + "logits/chosen": -2.750358819961548, + "logits/rejected": -2.7341530323028564, + "logps/chosen": -0.8058354258537292, + "logps/rejected": -0.9941579699516296, + "loss": 0.5125, + "nll_loss": 0.4958602488040924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04029177129268646, + "rewards/margins": 0.009416128508746624, + "rewards/rejected": -0.04970790073275566, "step": 180 }, { "epoch": 0.19926586261143156, - "grad_norm": 5.689090620434962, - "learning_rate": 1.4509525002200235e-05, - "log_odds_chosen": 0.23676976561546326, - "log_odds_ratio": -0.6501709222793579, - "logits/chosen": -3.0815584659576416, - "logits/rejected": -3.1054322719573975, - "logps/chosen": -0.9278916120529175, - "logps/rejected": -1.0751855373382568, - "loss": 0.5906, - "nll_loss": 0.6120038628578186, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.046394579112529755, - "rewards/margins": 0.007364692632108927, - "rewards/rejected": -0.05375927686691284, + "grad_norm": 2.6903547627560362, + "learning_rate": 5.078333750770082e-06, + "log_odds_chosen": 0.3165002167224884, + "log_odds_ratio": -0.6190484762191772, + "logits/chosen": -2.766507387161255, + "logits/rejected": -2.747089385986328, + "logps/chosen": -0.8013149499893188, + "logps/rejected": -0.9806981086730957, + "loss": 0.5316, + "nll_loss": 0.5532199740409851, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04006574675440788, + "rewards/margins": 0.008969161659479141, + "rewards/rejected": -0.04903491213917732, "step": 190 }, { "epoch": 0.2097535395909806, - "grad_norm": 2.2848535898780375, - "learning_rate": 1.4142135623730951e-05, - "log_odds_chosen": 0.2697228789329529, - "log_odds_ratio": -0.6704415082931519, - "logits/chosen": -2.99995756149292, - "logits/rejected": -3.038682460784912, - "logps/chosen": -0.9138332605361938, - "logps/rejected": -1.1080011129379272, - "loss": 0.5676, - "nll_loss": 0.5736643075942993, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04569166153669357, - "rewards/margins": 0.009708395227789879, - "rewards/rejected": -0.0554000549018383, + "grad_norm": 2.1991852076726754, + "learning_rate": 4.949747468305832e-06, + "log_odds_chosen": 0.33575549721717834, + "log_odds_ratio": -0.651211678981781, + "logits/chosen": -2.7371087074279785, + "logits/rejected": -2.7220566272735596, + "logps/chosen": -0.7840306162834167, + "logps/rejected": -1.0072247982025146, + "loss": 0.5074, + "nll_loss": 0.5064893960952759, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.039201535284519196, + "rewards/margins": 0.011159711517393589, + "rewards/rejected": -0.05036124587059021, "step": 200 }, { "epoch": 0.2097535395909806, - "eval_log_odds_chosen": 0.2850116789340973, - "eval_log_odds_ratio": -0.6474155783653259, - "eval_logits/chosen": -2.9992330074310303, - "eval_logits/rejected": -3.0026443004608154, - "eval_logps/chosen": -0.8811094164848328, - "eval_logps/rejected": -1.0644237995147705, - "eval_loss": 0.5726434588432312, - "eval_nll_loss": 0.5359312295913696, - "eval_rewards/accuracies": 0.625, - "eval_rewards/chosen": -0.04405546560883522, - "eval_rewards/margins": 0.00916572567075491, - "eval_rewards/rejected": -0.053221192210912704, - "eval_runtime": 137.9025, - "eval_samples_per_second": 14.459, - "eval_steps_per_second": 0.457, + "eval_log_odds_chosen": 0.31895044445991516, + "eval_log_odds_ratio": -0.6356511116027832, + "eval_logits/chosen": -2.700209140777588, + "eval_logits/rejected": -2.673612594604492, + "eval_logps/chosen": -0.7611523866653442, + "eval_logps/rejected": -0.9565821290016174, + "eval_loss": 0.5133659839630127, + "eval_nll_loss": 0.47739487886428833, + "eval_rewards/accuracies": 0.6408730149269104, + "eval_rewards/chosen": -0.03805762156844139, + "eval_rewards/margins": 0.009771487675607204, + "eval_rewards/rejected": -0.04782910645008087, + "eval_runtime": 136.4881, + "eval_samples_per_second": 14.609, + "eval_steps_per_second": 0.462, "step": 200 }, { "epoch": 0.22024121657052964, - "grad_norm": 2.2864637176453266, - "learning_rate": 1.3801311186847084e-05, - "log_odds_chosen": 0.10374544560909271, - "log_odds_ratio": -0.7170687913894653, - "logits/chosen": -3.0079314708709717, - "logits/rejected": -3.026061773300171, - "logps/chosen": -0.8713214993476868, - "logps/rejected": -0.9376395344734192, - "loss": 0.5683, - "nll_loss": 0.5364366769790649, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04356607422232628, - "rewards/margins": 0.003315900219604373, - "rewards/rejected": -0.04688197374343872, + "grad_norm": 2.2979124053363367, + "learning_rate": 4.830458915396479e-06, + "log_odds_chosen": 0.14570581912994385, + "log_odds_ratio": -0.7079066038131714, + "logits/chosen": -2.6945998668670654, + "logits/rejected": -2.693587064743042, + "logps/chosen": -0.7664598226547241, + "logps/rejected": -0.8435371518135071, + "loss": 0.5092, + "nll_loss": 0.47726479172706604, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.03832298889756203, + "rewards/margins": 0.003853868693113327, + "rewards/rejected": -0.04217685014009476, "step": 210 }, { "epoch": 0.23072889355007867, - "grad_norm": 2.3833164568305705, - "learning_rate": 1.3483997249264842e-05, - "log_odds_chosen": 0.1967695653438568, - "log_odds_ratio": -0.6872244477272034, - "logits/chosen": -3.066392183303833, - "logits/rejected": -3.0755832195281982, - "logps/chosen": -0.8734294176101685, - "logps/rejected": -0.9998324513435364, - "loss": 0.5608, - "nll_loss": 0.5176301598548889, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.043671466410160065, - "rewards/margins": 0.006320156157016754, - "rewards/rejected": -0.04999162256717682, + "grad_norm": 2.7379211509120998, + "learning_rate": 4.719399037242694e-06, + "log_odds_chosen": 0.2301570177078247, + "log_odds_ratio": -0.6864482164382935, + "logits/chosen": -2.7330780029296875, + "logits/rejected": -2.738948106765747, + "logps/chosen": -0.7607365250587463, + "logps/rejected": -0.902021050453186, + "loss": 0.5025, + "nll_loss": 0.4629960060119629, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.038036830723285675, + "rewards/margins": 0.007064227946102619, + "rewards/rejected": -0.04510105401277542, "step": 220 }, { "epoch": 0.2412165705296277, - "grad_norm": 2.143148051812647, - "learning_rate": 1.3187609467915744e-05, - "log_odds_chosen": 0.2681586444377899, - "log_odds_ratio": -0.669995129108429, - "logits/chosen": -3.0045371055603027, - "logits/rejected": -3.023197889328003, - "logps/chosen": -0.9347988963127136, - "logps/rejected": -1.1079022884368896, - "loss": 0.5715, - "nll_loss": 0.5268279910087585, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04673994705080986, - "rewards/margins": 0.00865517370402813, - "rewards/rejected": -0.05539512634277344, + "grad_norm": 2.3286309701071986, + "learning_rate": 4.615663313770509e-06, + "log_odds_chosen": 0.30348774790763855, + "log_odds_ratio": -0.6618221402168274, + "logits/chosen": -2.681114673614502, + "logits/rejected": -2.680468797683716, + "logps/chosen": -0.8015350103378296, + "logps/rejected": -0.9835436940193176, + "loss": 0.5126, + "nll_loss": 0.47201746702194214, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04007675126194954, + "rewards/margins": 0.009100432507693768, + "rewards/rejected": -0.04917718470096588, "step": 230 }, { "epoch": 0.2517042475091767, - "grad_norm": 2.4867634050680865, - "learning_rate": 1.2909944487358057e-05, - "log_odds_chosen": 0.2310989797115326, - "log_odds_ratio": -0.6607853770256042, - "logits/chosen": -3.0592639446258545, - "logits/rejected": -3.0972437858581543, - "logps/chosen": -0.90626060962677, - "logps/rejected": -1.057490587234497, - "loss": 0.5797, - "nll_loss": 0.5543950796127319, + "grad_norm": 2.498755216094707, + "learning_rate": 4.51848057057532e-06, + "log_odds_chosen": 0.28177785873413086, + "log_odds_ratio": -0.6470693945884705, + "logits/chosen": -2.7920804023742676, + "logits/rejected": -2.7859511375427246, + "logps/chosen": -0.7856557965278625, + "logps/rejected": -0.9694973826408386, + "loss": 0.5227, + "nll_loss": 0.49716347455978394, "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.0453130342066288, - "rewards/margins": 0.007561509497463703, - "rewards/rejected": -0.05287454277276993, + "rewards/chosen": -0.03928279131650925, + "rewards/margins": 0.009192083030939102, + "rewards/rejected": -0.04847487062215805, "step": 240 }, { "epoch": 0.26219192448872575, - "grad_norm": 2.2846935841220364, - "learning_rate": 1.2649110640673518e-05, - "log_odds_chosen": 0.24984344840049744, - "log_odds_ratio": -0.6764962077140808, - "logits/chosen": -3.0678868293762207, - "logits/rejected": -3.0685126781463623, - "logps/chosen": -0.8884732127189636, - "logps/rejected": -1.025420904159546, - "loss": 0.5498, - "nll_loss": 0.5219429731369019, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.0444236658513546, - "rewards/margins": 0.006847388111054897, - "rewards/rejected": -0.05127105116844177, + "grad_norm": 2.5700569103186335, + "learning_rate": 4.427188724235731e-06, + "log_odds_chosen": 0.2942022681236267, + "log_odds_ratio": -0.6677531003952026, + "logits/chosen": -2.761166572570801, + "logits/rejected": -2.763213634490967, + "logps/chosen": -0.77226322889328, + "logps/rejected": -0.9335973858833313, + "loss": 0.4963, + "nll_loss": 0.4665839672088623, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.03861316293478012, + "rewards/margins": 0.008066706359386444, + "rewards/rejected": -0.04667987301945686, "step": 250 }, { "epoch": 0.2726796014682748, - "grad_norm": 2.3800633619201523, - "learning_rate": 1.2403473458920845e-05, - "log_odds_chosen": 0.2426706850528717, - "log_odds_ratio": -0.6691194772720337, - "logits/chosen": -3.0950028896331787, - "logits/rejected": -3.112684488296509, - "logps/chosen": -0.8879591822624207, - "logps/rejected": -1.042834997177124, - "loss": 0.5302, - "nll_loss": 0.45519179105758667, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04439795762300491, - "rewards/margins": 0.00774379214271903, - "rewards/rejected": -0.05214175581932068, + "grad_norm": 2.5460185754878415, + "learning_rate": 4.341215710622295e-06, + "log_odds_chosen": 0.31073135137557983, + "log_odds_ratio": -0.6524397134780884, + "logits/chosen": -2.721327304840088, + "logits/rejected": -2.711200475692749, + "logps/chosen": -0.7779613137245178, + "logps/rejected": -0.9653064608573914, + "loss": 0.478, + "nll_loss": 0.40727710723876953, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.038898058235645294, + "rewards/margins": 0.009367265738546848, + "rewards/rejected": -0.048265330493450165, "step": 260 }, { "epoch": 0.2831672784478238, - "grad_norm": 2.3697586961370027, - "learning_rate": 1.2171612389003691e-05, - "log_odds_chosen": 0.23119862377643585, - "log_odds_ratio": -0.6756153702735901, - "logits/chosen": -3.113889455795288, - "logits/rejected": -3.157740354537964, - "logps/chosen": -0.9564247131347656, - "logps/rejected": -1.1352105140686035, - "loss": 0.5654, - "nll_loss": 0.5433498024940491, - "rewards/accuracies": 0.512499988079071, - "rewards/chosen": -0.04782123863697052, - "rewards/margins": 0.008939290419220924, - "rewards/rejected": -0.056760527193546295, + "grad_norm": 2.63045792619979, + "learning_rate": 4.260064336151291e-06, + "log_odds_chosen": 0.2511529326438904, + "log_odds_ratio": -0.6676173806190491, + "logits/chosen": -2.757246255874634, + "logits/rejected": -2.7497289180755615, + "logps/chosen": -0.8231350779533386, + "logps/rejected": -0.9868103265762329, + "loss": 0.5115, + "nll_loss": 0.48606061935424805, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04115675389766693, + "rewards/margins": 0.008183758705854416, + "rewards/rejected": -0.04934050887823105, "step": 270 }, { "epoch": 0.29365495542737285, - "grad_norm": 1.9757109026566833, - "learning_rate": 1.1952286093343936e-05, - "log_odds_chosen": 0.25132113695144653, - "log_odds_ratio": -0.6663895845413208, - "logits/chosen": -3.1407101154327393, - "logits/rejected": -3.1832191944122314, - "logps/chosen": -0.9308640360832214, - "logps/rejected": -1.087449312210083, - "loss": 0.5429, - "nll_loss": 0.4785974621772766, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04654319956898689, - "rewards/margins": 0.007829269394278526, - "rewards/rejected": -0.05437246710062027, + "grad_norm": 2.074128745122309, + "learning_rate": 4.183300132670378e-06, + "log_odds_chosen": 0.27424556016921997, + "log_odds_ratio": -0.6629655361175537, + "logits/chosen": -2.694702625274658, + "logits/rejected": -2.695335626602173, + "logps/chosen": -0.8050632476806641, + "logps/rejected": -0.9577094912528992, + "loss": 0.4891, + "nll_loss": 0.4250563681125641, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.0402531661093235, + "rewards/margins": 0.007632312830537558, + "rewards/rejected": -0.0478854700922966, "step": 280 }, { "epoch": 0.30414263240692185, - "grad_norm": 2.7308236297418427, - "learning_rate": 1.1744404390294071e-05, - "log_odds_chosen": 0.35913094878196716, - "log_odds_ratio": -0.6187662482261658, - "logits/chosen": -3.0944533348083496, - "logits/rejected": -3.1177055835723877, - "logps/chosen": -0.8355825543403625, - "logps/rejected": -1.0572632551193237, - "loss": 0.5568, - "nll_loss": 0.48925265669822693, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.041779130697250366, - "rewards/margins": 0.011084041558206081, - "rewards/rejected": -0.052863169461488724, + "grad_norm": 2.818316169672816, + "learning_rate": 4.110541536602925e-06, + "log_odds_chosen": 0.40846139192581177, + "log_odds_ratio": -0.6159543991088867, + "logits/chosen": -2.689415216445923, + "logits/rejected": -2.6885359287261963, + "logps/chosen": -0.729388952255249, + "logps/rejected": -0.9667993783950806, + "loss": 0.5032, + "nll_loss": 0.43972086906433105, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.03646944463253021, + "rewards/margins": 0.011870523914694786, + "rewards/rejected": -0.04833997040987015, "step": 290 }, { "epoch": 0.3146303093864709, - "grad_norm": 2.472653160364779, - "learning_rate": 1.1547005383792517e-05, - "log_odds_chosen": 0.2816540598869324, - "log_odds_ratio": -0.6775935888290405, - "logits/chosen": -3.092194080352783, - "logits/rejected": -3.1420485973358154, - "logps/chosen": -0.8778301477432251, - "logps/rejected": -1.0717580318450928, - "loss": 0.5819, - "nll_loss": 0.5100258588790894, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04389150068163872, - "rewards/margins": 0.009696396067738533, - "rewards/rejected": -0.0535879023373127, + "grad_norm": 2.6319487345124495, + "learning_rate": 4.0414518843273805e-06, + "log_odds_chosen": 0.2938074767589569, + "log_odds_ratio": -0.675439178943634, + "logits/chosen": -2.746011257171631, + "logits/rejected": -2.719851016998291, + "logps/chosen": -0.7730266451835632, + "logps/rejected": -0.9800483584403992, + "loss": 0.5265, + "nll_loss": 0.45733898878097534, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03865132853388786, + "rewards/margins": 0.01035108882933855, + "rewards/rejected": -0.04900241643190384, "step": 300 }, { "epoch": 0.3146303093864709, - "eval_log_odds_chosen": 0.28298813104629517, - "eval_log_odds_ratio": -0.6463662981987, - "eval_logits/chosen": -3.1391000747680664, - "eval_logits/rejected": -3.1424100399017334, - "eval_logps/chosen": -0.8770027756690979, - "eval_logps/rejected": -1.0619502067565918, - "eval_loss": 0.5552015900611877, - "eval_nll_loss": 0.5201771259307861, - "eval_rewards/accuracies": 0.6289682388305664, - "eval_rewards/chosen": -0.043850142508745193, - "eval_rewards/margins": 0.00924737099558115, - "eval_rewards/rejected": -0.05309751257300377, - "eval_runtime": 141.1002, - "eval_samples_per_second": 14.132, - "eval_steps_per_second": 0.446, + "eval_log_odds_chosen": 0.32782861590385437, + "eval_log_odds_ratio": -0.6374222040176392, + "eval_logits/chosen": -2.75937819480896, + "eval_logits/rejected": -2.731720209121704, + "eval_logps/chosen": -0.7587753534317017, + "eval_logps/rejected": -0.9572128653526306, + "eval_loss": 0.5012248754501343, + "eval_nll_loss": 0.4652516841888428, + "eval_rewards/accuracies": 0.6329365372657776, + "eval_rewards/chosen": -0.037938766181468964, + "eval_rewards/margins": 0.009921879507601261, + "eval_rewards/rejected": -0.04786064475774765, + "eval_runtime": 143.3287, + "eval_samples_per_second": 13.912, + "eval_steps_per_second": 0.44, "step": 300 }, { "epoch": 0.3251179863660199, - "grad_norm": 2.038557141198459, - "learning_rate": 1.1359236684941297e-05, - "log_odds_chosen": 0.1998841017484665, - "log_odds_ratio": -0.6875525116920471, - "logits/chosen": -3.0676262378692627, - "logits/rejected": -3.07094407081604, - "logps/chosen": -0.9092122912406921, - "logps/rejected": -1.0280473232269287, - "loss": 0.5844, - "nll_loss": 0.5417822599411011, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04546061158180237, - "rewards/margins": 0.005941747687757015, - "rewards/rejected": -0.051402367651462555, + "grad_norm": 2.303425231373124, + "learning_rate": 3.975732839729454e-06, + "log_odds_chosen": 0.23192088305950165, + "log_odds_ratio": -0.6818796396255493, + "logits/chosen": -2.7074503898620605, + "logits/rejected": -2.673837661743164, + "logps/chosen": -0.7971353530883789, + "logps/rejected": -0.9301053285598755, + "loss": 0.5302, + "nll_loss": 0.48708105087280273, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.039856769144535065, + "rewards/margins": 0.006648494862020016, + "rewards/rejected": -0.046505264937877655, "step": 310 }, { "epoch": 0.33560566334556896, - "grad_norm": 2.262270965184679, - "learning_rate": 1.118033988749895e-05, - "log_odds_chosen": 0.2705835700035095, - "log_odds_ratio": -0.6538633108139038, - "logits/chosen": -3.127427339553833, - "logits/rejected": -3.142587661743164, - "logps/chosen": -0.9069059491157532, - "logps/rejected": -1.0691728591918945, - "loss": 0.5242, - "nll_loss": 0.4929099977016449, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04534530267119408, - "rewards/margins": 0.008113345131278038, - "rewards/rejected": -0.05345864221453667, + "grad_norm": 2.5118343787899735, + "learning_rate": 3.913118960624632e-06, + "log_odds_chosen": 0.3314226567745209, + "log_odds_ratio": -0.6417438387870789, + "logits/chosen": -2.7188448905944824, + "logits/rejected": -2.7005674839019775, + "logps/chosen": -0.7902022004127502, + "logps/rejected": -0.9723421335220337, + "loss": 0.4738, + "nll_loss": 0.44032588601112366, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.03951011225581169, + "rewards/margins": 0.009106996469199657, + "rewards/rejected": -0.048617102205753326, "step": 320 }, { "epoch": 0.34609334032511796, - "grad_norm": 2.4122464498293623, - "learning_rate": 1.1009637651263608e-05, - "log_odds_chosen": 0.23684370517730713, - "log_odds_ratio": -0.7030869722366333, - "logits/chosen": -3.0819878578186035, - "logits/rejected": -3.1327972412109375, - "logps/chosen": -0.9059860110282898, - "logps/rejected": -1.0601646900177002, - "loss": 0.5547, - "nll_loss": 0.5366790890693665, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.04529929906129837, - "rewards/margins": 0.007708935532718897, - "rewards/rejected": -0.05300822854042053, + "grad_norm": 2.490550595224948, + "learning_rate": 3.853373177942262e-06, + "log_odds_chosen": 0.29606467485427856, + "log_odds_ratio": -0.6935312151908875, + "logits/chosen": -2.6737678050994873, + "logits/rejected": -2.6778550148010254, + "logps/chosen": -0.7957532405853271, + "logps/rejected": -0.9609133005142212, + "loss": 0.5015, + "nll_loss": 0.48406466841697693, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.039787657558918, + "rewards/margins": 0.008258005604147911, + "rewards/rejected": -0.04804566502571106, "step": 330 }, { "epoch": 0.356581017304667, - "grad_norm": 2.3793498474146535, - "learning_rate": 1.0846522890932809e-05, - "log_odds_chosen": 0.18786638975143433, - "log_odds_ratio": -0.6986292004585266, - "logits/chosen": -3.0940568447113037, - "logits/rejected": -3.1512954235076904, - "logps/chosen": -0.8602282404899597, - "logps/rejected": -0.9875131845474243, - "loss": 0.5702, - "nll_loss": 0.5145949125289917, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.043011412024497986, - "rewards/margins": 0.0063642458990216255, - "rewards/rejected": -0.049375660717487335, + "grad_norm": 2.455512863241718, + "learning_rate": 3.796283011826483e-06, + "log_odds_chosen": 0.2068498581647873, + "log_odds_ratio": -0.6988531947135925, + "logits/chosen": -2.656428575515747, + "logits/rejected": -2.67673659324646, + "logps/chosen": -0.7645977139472961, + "logps/rejected": -0.9020528793334961, + "loss": 0.5161, + "nll_loss": 0.46574801206588745, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.038229890167713165, + "rewards/margins": 0.006872760597616434, + "rewards/rejected": -0.04510264843702316, "step": 340 }, { "epoch": 0.36706869428421607, - "grad_norm": 2.3420960793915517, - "learning_rate": 1.0690449676496977e-05, - "log_odds_chosen": 0.2689460217952728, - "log_odds_ratio": -0.6845754384994507, - "logits/chosen": -3.1326746940612793, - "logits/rejected": -3.1552205085754395, - "logps/chosen": -0.8725005984306335, - "logps/rejected": -1.0421197414398193, - "loss": 0.5462, - "nll_loss": 0.5172144174575806, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.043625034391880035, - "rewards/margins": 0.008480949327349663, - "rewards/rejected": -0.05210598558187485, + "grad_norm": 2.3906859020418243, + "learning_rate": 3.7416573867739415e-06, + "log_odds_chosen": 0.32536062598228455, + "log_odds_ratio": -0.6628221273422241, + "logits/chosen": -2.7076945304870605, + "logits/rejected": -2.6763672828674316, + "logps/chosen": -0.7698060274124146, + "logps/rejected": -0.9597750902175903, + "loss": 0.4925, + "nll_loss": 0.468719482421875, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.03849030286073685, + "rewards/margins": 0.009498453699052334, + "rewards/rejected": -0.047988757491111755, "step": 350 }, { "epoch": 0.37755637126376507, - "grad_norm": 2.014589871880686, - "learning_rate": 1.0540925533894598e-05, - "log_odds_chosen": 0.37792789936065674, - "log_odds_ratio": -0.6156649589538574, - "logits/chosen": -3.010802745819092, - "logits/rejected": -3.042652130126953, - "logps/chosen": -0.8830682635307312, - "logps/rejected": -1.118240237236023, - "loss": 0.5497, - "nll_loss": 0.5099813938140869, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04415341466665268, - "rewards/margins": 0.011758595705032349, - "rewards/rejected": -0.05591200664639473, + "grad_norm": 2.1635991647413824, + "learning_rate": 3.689323936863109e-06, + "log_odds_chosen": 0.4051761031150818, + "log_odds_ratio": -0.6067623496055603, + "logits/chosen": -2.6350862979888916, + "logits/rejected": -2.635108232498169, + "logps/chosen": -0.768888533115387, + "logps/rejected": -1.0009427070617676, + "loss": 0.5009, + "nll_loss": 0.45801717042922974, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.03844442963600159, + "rewards/margins": 0.011602705344557762, + "rewards/rejected": -0.0500471368432045, "step": 360 }, { "epoch": 0.3880440482433141, - "grad_norm": 2.0494786838330903, - "learning_rate": 1.0397504898200728e-05, - "log_odds_chosen": 0.37991228699684143, - "log_odds_ratio": -0.6151097416877747, - "logits/chosen": -3.071289539337158, - "logits/rejected": -3.0840888023376465, - "logps/chosen": -0.863991379737854, - "logps/rejected": -1.1161118745803833, - "loss": 0.5195, - "nll_loss": 0.4998775124549866, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.0431995615363121, - "rewards/margins": 0.012606029398739338, - "rewards/rejected": -0.055805593729019165, + "grad_norm": 2.3887899088845037, + "learning_rate": 3.6391267143702543e-06, + "log_odds_chosen": 0.4100113809108734, + "log_odds_ratio": -0.6096552014350891, + "logits/chosen": -2.707559108734131, + "logits/rejected": -2.6750998497009277, + "logps/chosen": -0.7636415362358093, + "logps/rejected": -1.0189807415008545, + "loss": 0.4701, + "nll_loss": 0.45124197006225586, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.038182083517313004, + "rewards/margins": 0.012766959145665169, + "rewards/rejected": -0.050949037075042725, "step": 370 }, { "epoch": 0.3985317252228631, - "grad_norm": 2.3440751758332294, - "learning_rate": 1.0259783520851543e-05, - "log_odds_chosen": 0.4805373549461365, - "log_odds_ratio": -0.5845500230789185, - "logits/chosen": -3.1311728954315186, - "logits/rejected": -3.168400287628174, - "logps/chosen": -0.8546767234802246, - "logps/rejected": -1.1352304220199585, - "loss": 0.5371, - "nll_loss": 0.5167530179023743, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.04273384064435959, - "rewards/margins": 0.014027683064341545, - "rewards/rejected": -0.056761521846055984, + "grad_norm": 2.5794228625801225, + "learning_rate": 3.5909242322980396e-06, + "log_odds_chosen": 0.4701065421104431, + "log_odds_ratio": -0.5877975821495056, + "logits/chosen": -2.7147293090820312, + "logits/rejected": -2.700373888015747, + "logps/chosen": -0.7640558481216431, + "logps/rejected": -1.0210450887680054, + "loss": 0.4866, + "nll_loss": 0.4662235379219055, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.03820279613137245, + "rewards/margins": 0.012849463149905205, + "rewards/rejected": -0.05105225369334221, "step": 380 }, { "epoch": 0.4090194022024122, - "grad_norm": 2.50155675830033, - "learning_rate": 1.0127393670836667e-05, - "log_odds_chosen": 0.0912429466843605, - "log_odds_ratio": -0.7177212238311768, - "logits/chosen": -3.1054975986480713, - "logits/rejected": -3.1308093070983887, - "logps/chosen": -0.9102872014045715, - "logps/rejected": -0.9754246473312378, - "loss": 0.5574, - "nll_loss": 0.5331951379776001, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.045514363795518875, - "rewards/margins": 0.003256872994825244, - "rewards/rejected": -0.04877123609185219, + "grad_norm": 2.2524505662506007, + "learning_rate": 3.544587784792833e-06, + "log_odds_chosen": 0.15358106791973114, + "log_odds_ratio": -0.6960343718528748, + "logits/chosen": -2.6469695568084717, + "logits/rejected": -2.6523191928863525, + "logps/chosen": -0.8073819875717163, + "logps/rejected": -0.9069193005561829, + "loss": 0.5052, + "nll_loss": 0.48589834570884705, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04036910459399223, + "rewards/margins": 0.004976863972842693, + "rewards/rejected": -0.0453459694981575, "step": 390 }, { "epoch": 0.4195070791819612, - "grad_norm": 2.027467517514936, - "learning_rate": 1e-05, - "log_odds_chosen": 0.2633103132247925, - "log_odds_ratio": -0.6879682540893555, - "logits/chosen": -3.0087058544158936, - "logits/rejected": -3.0386600494384766, - "logps/chosen": -0.9468951225280762, - "logps/rejected": -1.1236045360565186, - "loss": 0.5738, - "nll_loss": 0.527585506439209, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04734475538134575, - "rewards/margins": 0.008835467509925365, - "rewards/rejected": -0.05618022754788399, + "grad_norm": 2.151733711875547, + "learning_rate": 3.5e-06, + "log_odds_chosen": 0.3257240355014801, + "log_odds_ratio": -0.6618676781654358, + "logits/chosen": -2.5556883811950684, + "logits/rejected": -2.5709598064422607, + "logps/chosen": -0.8370679616928101, + "logps/rejected": -1.0387462377548218, + "loss": 0.5194, + "nll_loss": 0.471977561712265, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.0418534018099308, + "rewards/margins": 0.010083912871778011, + "rewards/rejected": -0.05193731188774109, "step": 400 }, { "epoch": 0.4195070791819612, - "eval_log_odds_chosen": 0.2960740923881531, - "eval_log_odds_ratio": -0.6521593332290649, - "eval_logits/chosen": -3.1019551753997803, - "eval_logits/rejected": -3.1026368141174316, - "eval_logps/chosen": -0.8433709740638733, - "eval_logps/rejected": -1.0346297025680542, - "eval_loss": 0.5411269664764404, - "eval_nll_loss": 0.5047088265419006, - "eval_rewards/accuracies": 0.6289682388305664, - "eval_rewards/chosen": -0.042168550193309784, - "eval_rewards/margins": 0.00956293661147356, - "eval_rewards/rejected": -0.05173148587346077, - "eval_runtime": 135.94, - "eval_samples_per_second": 14.668, - "eval_steps_per_second": 0.463, + "eval_log_odds_chosen": 0.3606604039669037, + "eval_log_odds_ratio": -0.6283872127532959, + "eval_logits/chosen": -2.6973965167999268, + "eval_logits/rejected": -2.664045572280884, + "eval_logps/chosen": -0.7416918277740479, + "eval_logps/rejected": -0.9558579921722412, + "eval_loss": 0.4911641776561737, + "eval_nll_loss": 0.455983966588974, + "eval_rewards/accuracies": 0.6428571343421936, + "eval_rewards/chosen": -0.03708459436893463, + "eval_rewards/margins": 0.010708308778703213, + "eval_rewards/rejected": -0.04779290035367012, + "eval_runtime": 137.3177, + "eval_samples_per_second": 14.521, + "eval_steps_per_second": 0.459, "step": 400 }, { "epoch": 0.4299947561615102, - "grad_norm": 2.077556227084633, - "learning_rate": 9.877295966495898e-06, - "log_odds_chosen": 0.1433972865343094, - "log_odds_ratio": -0.7417241930961609, - "logits/chosen": -3.147104024887085, - "logits/rejected": -3.1611135005950928, - "logps/chosen": -0.8865131139755249, - "logps/rejected": -0.9979325532913208, - "loss": 0.5454, - "nll_loss": 0.4825812876224518, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.044325657188892365, - "rewards/margins": 0.005570969078689814, - "rewards/rejected": -0.04989662766456604, + "grad_norm": 2.234889439349526, + "learning_rate": 3.457053588273564e-06, + "log_odds_chosen": 0.22749297320842743, + "log_odds_ratio": -0.6977051496505737, + "logits/chosen": -2.6853058338165283, + "logits/rejected": -2.646806001663208, + "logps/chosen": -0.7714927792549133, + "logps/rejected": -0.9221086502075195, + "loss": 0.4951, + "nll_loss": 0.43608254194259644, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.038574643433094025, + "rewards/margins": 0.00753078842535615, + "rewards/rejected": -0.04610542953014374, "step": 410 }, { "epoch": 0.4404824331410593, - "grad_norm": 1.9177361456178337, - "learning_rate": 9.759000729485331e-06, - "log_odds_chosen": 0.2965100407600403, - "log_odds_ratio": -0.6552795171737671, - "logits/chosen": -3.065213203430176, - "logits/rejected": -3.106889247894287, - "logps/chosen": -0.8926699757575989, - "logps/rejected": -1.073974609375, - "loss": 0.5349, - "nll_loss": 0.47521886229515076, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04463350027799606, - "rewards/margins": 0.009065226651728153, - "rewards/rejected": -0.05369872972369194, + "grad_norm": 2.0285171917411766, + "learning_rate": 3.4156502553198657e-06, + "log_odds_chosen": 0.3810080885887146, + "log_odds_ratio": -0.6389856338500977, + "logits/chosen": -2.6045069694519043, + "logits/rejected": -2.621366024017334, + "logps/chosen": -0.7517096996307373, + "logps/rejected": -0.9603899121284485, + "loss": 0.4852, + "nll_loss": 0.42949992418289185, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.037585485726594925, + "rewards/margins": 0.01043400727212429, + "rewards/rejected": -0.048019491136074066, "step": 420 }, { "epoch": 0.4509701101206083, - "grad_norm": 2.2675621915351503, - "learning_rate": 9.644856443408244e-06, - "log_odds_chosen": 0.29174235463142395, - "log_odds_ratio": -0.6506129503250122, - "logits/chosen": -3.075723648071289, - "logits/rejected": -3.0862226486206055, - "logps/chosen": -0.8427901268005371, - "logps/rejected": -1.0184295177459717, - "loss": 0.5557, - "nll_loss": 0.5429800152778625, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.04213951155543327, - "rewards/margins": 0.008781969547271729, - "rewards/rejected": -0.050921481102705, + "grad_norm": 2.508500818711511, + "learning_rate": 3.375699755192885e-06, + "log_odds_chosen": 0.3060067594051361, + "log_odds_ratio": -0.6428481936454773, + "logits/chosen": -2.6315762996673584, + "logits/rejected": -2.614450216293335, + "logps/chosen": -0.7450464367866516, + "logps/rejected": -0.9214862585067749, + "loss": 0.5054, + "nll_loss": 0.4888521730899811, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.03725232556462288, + "rewards/margins": 0.008821990340948105, + "rewards/rejected": -0.046074315905570984, "step": 430 }, { "epoch": 0.46145778710015734, - "grad_norm": 2.048479923586714, - "learning_rate": 9.534625892455923e-06, - "log_odds_chosen": 0.2715272009372711, - "log_odds_ratio": -0.6504871249198914, - "logits/chosen": -3.114889144897461, - "logits/rejected": -3.1430869102478027, - "logps/chosen": -0.8674638867378235, - "logps/rejected": -1.0402857065200806, - "loss": 0.5502, - "nll_loss": 0.5185979604721069, + "grad_norm": 2.209049048242546, + "learning_rate": 3.337119062359573e-06, + "log_odds_chosen": 0.2785058617591858, + "log_odds_ratio": -0.6411095857620239, + "logits/chosen": -2.6460564136505127, + "logits/rejected": -2.6254661083221436, + "logps/chosen": -0.7616952061653137, + "logps/rejected": -0.9235254526138306, + "loss": 0.5024, + "nll_loss": 0.46845754981040955, "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04337319731712341, - "rewards/margins": 0.008641095831990242, - "rewards/rejected": -0.05201428383588791, + "rewards/chosen": -0.038084764033555984, + "rewards/margins": 0.008091514930129051, + "rewards/rejected": -0.04617627337574959, "step": 440 }, { "epoch": 0.47194546407970633, - "grad_norm": 1.9700303764265876, - "learning_rate": 9.428090415820635e-06, - "log_odds_chosen": 0.37898144125938416, - "log_odds_ratio": -0.6548101305961609, - "logits/chosen": -3.141404390335083, - "logits/rejected": -3.1785435676574707, - "logps/chosen": -0.8289934396743774, - "logps/rejected": -1.080649733543396, - "loss": 0.5278, - "nll_loss": 0.49574679136276245, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04144967347383499, - "rewards/margins": 0.01258282084017992, - "rewards/rejected": -0.05403248593211174, + "grad_norm": 2.0098987626040574, + "learning_rate": 3.2998316455372222e-06, + "log_odds_chosen": 0.37491756677627563, + "log_odds_ratio": -0.648253321647644, + "logits/chosen": -2.6618144512176514, + "logits/rejected": -2.643500566482544, + "logps/chosen": -0.7266156673431396, + "logps/rejected": -0.9600238800048828, + "loss": 0.4828, + "nll_loss": 0.4462718069553375, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.03633078932762146, + "rewards/margins": 0.01167040504515171, + "rewards/rejected": -0.04800119251012802, "step": 450 }, { "epoch": 0.4824331410592554, - "grad_norm": 2.1444885294890796, - "learning_rate": 9.325048082403139e-06, - "log_odds_chosen": 0.21225424110889435, - "log_odds_ratio": -0.6999707221984863, - "logits/chosen": -3.110089063644409, - "logits/rejected": -3.1592323780059814, - "logps/chosen": -0.947162926197052, - "logps/rejected": -1.1105449199676514, - "loss": 0.5315, - "nll_loss": 0.5339683890342712, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04735814779996872, - "rewards/margins": 0.008169097825884819, - "rewards/rejected": -0.05552724748849869, + "grad_norm": 2.3085421987869785, + "learning_rate": 3.263766828841098e-06, + "log_odds_chosen": 0.2140667885541916, + "log_odds_ratio": -0.6971082091331482, + "logits/chosen": -2.6545071601867676, + "logits/rejected": -2.6458332538604736, + "logps/chosen": -0.8354724049568176, + "logps/rejected": -0.9942563772201538, + "loss": 0.4871, + "nll_loss": 0.48358869552612305, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.04177362099289894, + "rewards/margins": 0.007939198985695839, + "rewards/rejected": -0.04971281811594963, "step": 460 }, { "epoch": 0.4929208180388044, - "grad_norm": 2.1649660190560613, - "learning_rate": 9.225312080288851e-06, - "log_odds_chosen": 0.2549912929534912, - "log_odds_ratio": -0.6857655644416809, - "logits/chosen": -3.0928080081939697, - "logits/rejected": -3.1287431716918945, - "logps/chosen": -0.8865912556648254, - "logps/rejected": -1.050857663154602, - "loss": 0.5421, - "nll_loss": 0.5101572275161743, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.044329557567834854, - "rewards/margins": 0.008213317021727562, - "rewards/rejected": -0.052542876452207565, + "grad_norm": 2.58413257051123, + "learning_rate": 3.2288592281010976e-06, + "log_odds_chosen": 0.30273735523223877, + "log_odds_ratio": -0.6744717359542847, + "logits/chosen": -2.6462035179138184, + "logits/rejected": -2.6307010650634766, + "logps/chosen": -0.7793454527854919, + "logps/rejected": -0.9655405879020691, + "loss": 0.4932, + "nll_loss": 0.4597246050834656, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.038967277854681015, + "rewards/margins": 0.009309760294854641, + "rewards/rejected": -0.048277031630277634, "step": 470 }, { "epoch": 0.5034084950183534, - "grad_norm": 1.89898044344756, - "learning_rate": 9.12870929175277e-06, - "log_odds_chosen": 0.18933558464050293, - "log_odds_ratio": -0.7031041383743286, - "logits/chosen": -3.1588873863220215, - "logits/rejected": -3.1968955993652344, - "logps/chosen": -0.8558489680290222, - "logps/rejected": -0.980047881603241, - "loss": 0.5174, - "nll_loss": 0.5121264457702637, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04279245063662529, - "rewards/margins": 0.006209943443536758, - "rewards/rejected": -0.04900239408016205, + "grad_norm": 2.275276830168767, + "learning_rate": 3.195048252113469e-06, + "log_odds_chosen": 0.25159093737602234, + "log_odds_ratio": -0.6775428056716919, + "logits/chosen": -2.6590356826782227, + "logits/rejected": -2.649465560913086, + "logps/chosen": -0.7499970197677612, + "logps/rejected": -0.8869997262954712, + "loss": 0.4713, + "nll_loss": 0.4634857177734375, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.03749985247850418, + "rewards/margins": 0.006850133184343576, + "rewards/rejected": -0.04434997960925102, "step": 480 }, { "epoch": 0.5138961719979025, - "grad_norm": 1.9212510076087481, - "learning_rate": 9.035079029052514e-06, - "log_odds_chosen": 0.23131313920021057, - "log_odds_ratio": -0.6693936586380005, - "logits/chosen": -3.094421625137329, - "logits/rejected": -3.1039950847625732, - "logps/chosen": -0.9284296035766602, - "logps/rejected": -1.0470894575119019, - "loss": 0.5391, - "nll_loss": 0.5019217729568481, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04642148315906525, - "rewards/margins": 0.005932994186878204, - "rewards/rejected": -0.05235447734594345, + "grad_norm": 2.134835184101472, + "learning_rate": 3.1622776601683796e-06, + "log_odds_chosen": 0.2592507004737854, + "log_odds_ratio": -0.6677337884902954, + "logits/chosen": -2.638939619064331, + "logits/rejected": -2.5990116596221924, + "logps/chosen": -0.8319272994995117, + "logps/rejected": -0.9564205408096313, + "loss": 0.4941, + "nll_loss": 0.4587552547454834, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.041596364229917526, + "rewards/margins": 0.006224661134183407, + "rewards/rejected": -0.04782102257013321, "step": 490 }, { "epoch": 0.5243838489774515, - "grad_norm": 2.197524211966931, - "learning_rate": 8.94427190999916e-06, - "log_odds_chosen": 0.2233821153640747, - "log_odds_ratio": -0.6923887729644775, - "logits/chosen": -3.0647079944610596, - "logits/rejected": -3.0620505809783936, - "logps/chosen": -0.8755196332931519, - "logps/rejected": -1.0028659105300903, - "loss": 0.5478, - "nll_loss": 0.5219477415084839, + "grad_norm": 2.3707837495895494, + "learning_rate": 3.1304951684997056e-06, + "log_odds_chosen": 0.25932976603507996, + "log_odds_ratio": -0.6785644292831421, + "logits/chosen": -2.690480947494507, + "logits/rejected": -2.6417829990386963, + "logps/chosen": -0.7875474095344543, + "logps/rejected": -0.9345542788505554, + "loss": 0.5008, + "nll_loss": 0.47637850046157837, "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04377598315477371, - "rewards/margins": 0.0063673085533082485, - "rewards/rejected": -0.0501432940363884, + "rewards/chosen": -0.0393773689866066, + "rewards/margins": 0.007350355386734009, + "rewards/rejected": -0.04672772437334061, "step": 500 }, { "epoch": 0.5243838489774515, - "eval_log_odds_chosen": 0.33266139030456543, - "eval_log_odds_ratio": -0.6382430791854858, - "eval_logits/chosen": -3.028609275817871, - "eval_logits/rejected": -3.0259969234466553, - "eval_logps/chosen": -0.8414799571037292, - "eval_logps/rejected": -1.0509231090545654, - "eval_loss": 0.5319445133209229, - "eval_nll_loss": 0.49702468514442444, - "eval_rewards/accuracies": 0.6289682388305664, - "eval_rewards/chosen": -0.04207399860024452, - "eval_rewards/margins": 0.010472159832715988, - "eval_rewards/rejected": -0.05254615470767021, - "eval_runtime": 136.7326, - "eval_samples_per_second": 14.583, - "eval_steps_per_second": 0.461, + "eval_log_odds_chosen": 0.3873175382614136, + "eval_log_odds_ratio": -0.6208989024162292, + "eval_logits/chosen": -2.62943434715271, + "eval_logits/rejected": -2.5956878662109375, + "eval_logps/chosen": -0.7454984188079834, + "eval_logps/rejected": -0.9786220192909241, + "eval_loss": 0.4847143888473511, + "eval_nll_loss": 0.44987979531288147, + "eval_rewards/accuracies": 0.6507936716079712, + "eval_rewards/chosen": -0.03727491945028305, + "eval_rewards/margins": 0.011656176298856735, + "eval_rewards/rejected": -0.04893109202384949, + "eval_runtime": 138.4279, + "eval_samples_per_second": 14.405, + "eval_steps_per_second": 0.455, "step": 500 }, { "epoch": 0.5348715259570005, - "grad_norm": 1.7639475332504142, - "learning_rate": 8.856148855400955e-06, - "log_odds_chosen": 0.29167047142982483, - "log_odds_ratio": -0.648201048374176, - "logits/chosen": -3.0114383697509766, - "logits/rejected": -3.024693250656128, - "logps/chosen": -0.841100811958313, - "logps/rejected": -1.0192333459854126, - "loss": 0.5263, - "nll_loss": 0.5350626111030579, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04205504059791565, - "rewards/margins": 0.00890662893652916, - "rewards/rejected": -0.05096167325973511, + "grad_norm": 1.9535668554599182, + "learning_rate": 3.0996520993903337e-06, + "log_odds_chosen": 0.32442158460617065, + "log_odds_ratio": -0.6475775837898254, + "logits/chosen": -2.6708967685699463, + "logits/rejected": -2.649402141571045, + "logps/chosen": -0.7484665513038635, + "logps/rejected": -0.9413715600967407, + "loss": 0.4786, + "nll_loss": 0.48495978116989136, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.03742332383990288, + "rewards/margins": 0.00964525155723095, + "rewards/rejected": -0.047068577259778976, "step": 510 }, { "epoch": 0.5453592029365496, - "grad_norm": 1.6884098835310988, - "learning_rate": 8.770580193070294e-06, - "log_odds_chosen": 0.24579331278800964, - "log_odds_ratio": -0.6814862489700317, - "logits/chosen": -3.016019582748413, - "logits/rejected": -3.0255684852600098, - "logps/chosen": -0.9082791209220886, - "logps/rejected": -1.0769283771514893, - "loss": 0.5369, - "nll_loss": 0.47502464056015015, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04541395604610443, - "rewards/margins": 0.008432453498244286, - "rewards/rejected": -0.053846411406993866, + "grad_norm": 1.9645096615425393, + "learning_rate": 3.069703067574602e-06, + "log_odds_chosen": 0.2872227430343628, + "log_odds_ratio": -0.6613379716873169, + "logits/chosen": -2.6058475971221924, + "logits/rejected": -2.577051877975464, + "logps/chosen": -0.8017369508743286, + "logps/rejected": -0.9904945492744446, + "loss": 0.4897, + "nll_loss": 0.4331512451171875, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04008684307336807, + "rewards/margins": 0.009437882341444492, + "rewards/rejected": -0.04952472820878029, "step": 520 }, { "epoch": 0.5558468799160986, - "grad_norm": 1.7588436164574766, - "learning_rate": 8.687444855261389e-06, - "log_odds_chosen": 0.39766445755958557, - "log_odds_ratio": -0.6521557569503784, - "logits/chosen": -3.0906691551208496, - "logits/rejected": -3.1090755462646484, - "logps/chosen": -0.8297191858291626, - "logps/rejected": -1.1049801111221313, - "loss": 0.5364, - "nll_loss": 0.450814813375473, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04148596152663231, - "rewards/margins": 0.01376304216682911, - "rewards/rejected": -0.05524900555610657, + "grad_norm": 1.9526548988230616, + "learning_rate": 3.0406056993414858e-06, + "log_odds_chosen": 0.42971426248550415, + "log_odds_ratio": -0.641510009765625, + "logits/chosen": -2.6119577884674072, + "logits/rejected": -2.5998666286468506, + "logps/chosen": -0.7399083375930786, + "logps/rejected": -1.0167956352233887, + "loss": 0.4914, + "nll_loss": 0.41224998235702515, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.03699541836977005, + "rewards/margins": 0.013844366185367107, + "rewards/rejected": -0.050839781761169434, "step": 530 }, { "epoch": 0.5663345568956476, - "grad_norm": 1.9397603724841295, - "learning_rate": 8.606629658238705e-06, - "log_odds_chosen": 0.15624158084392548, - "log_odds_ratio": -0.7059566378593445, - "logits/chosen": -3.0063095092773438, - "logits/rejected": -3.0354349613189697, - "logps/chosen": -0.8621616363525391, - "logps/rejected": -0.9609626531600952, - "loss": 0.5526, - "nll_loss": 0.5280291438102722, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04310808330774307, - "rewards/margins": 0.0049400487914681435, - "rewards/rejected": -0.04804813116788864, + "grad_norm": 1.9884035673972174, + "learning_rate": 3.012320380383546e-06, + "log_odds_chosen": 0.21374063193798065, + "log_odds_ratio": -0.6833196878433228, + "logits/chosen": -2.6167845726013184, + "logits/rejected": -2.599025011062622, + "logps/chosen": -0.7700163125991821, + "logps/rejected": -0.890272319316864, + "loss": 0.5043, + "nll_loss": 0.47903138399124146, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.03850081190466881, + "rewards/margins": 0.006012803874909878, + "rewards/rejected": -0.04451362043619156, "step": 540 }, { "epoch": 0.5768222338751966, - "grad_norm": 1.9970251061131588, - "learning_rate": 8.528028654224417e-06, - "log_odds_chosen": 0.3964000940322876, - "log_odds_ratio": -0.6276581883430481, - "logits/chosen": -3.051056385040283, - "logits/rejected": -3.0628600120544434, - "logps/chosen": -0.8477095365524292, - "logps/rejected": -1.090545415878296, - "loss": 0.5377, - "nll_loss": 0.5382589101791382, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04238547384738922, - "rewards/margins": 0.012141798622906208, - "rewards/rejected": -0.05452727526426315, + "grad_norm": 2.186607185927277, + "learning_rate": 2.9848100289785456e-06, + "log_odds_chosen": 0.45103105902671814, + "log_odds_ratio": -0.6082615852355957, + "logits/chosen": -2.6567091941833496, + "logits/rejected": -2.609574794769287, + "logps/chosen": -0.7585142850875854, + "logps/rejected": -1.0295699834823608, + "loss": 0.4918, + "nll_loss": 0.48958802223205566, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03792571276426315, + "rewards/margins": 0.01355278305709362, + "rewards/rejected": -0.05147849768400192, "step": 550 }, { "epoch": 0.5873099108547457, - "grad_norm": 1.9451374983545444, - "learning_rate": 8.451542547285167e-06, - "log_odds_chosen": 0.24946291744709015, - "log_odds_ratio": -0.6731950044631958, - "logits/chosen": -3.09270977973938, - "logits/rejected": -3.1291451454162598, - "logps/chosen": -0.8785122632980347, - "logps/rejected": -1.0384708642959595, - "loss": 0.5214, - "nll_loss": 0.5020500421524048, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04392561689019203, - "rewards/margins": 0.007997924461960793, - "rewards/rejected": -0.05192355066537857, + "grad_norm": 2.1145358879634872, + "learning_rate": 2.958039891549808e-06, + "log_odds_chosen": 0.2827582359313965, + "log_odds_ratio": -0.6594165563583374, + "logits/chosen": -2.6023669242858887, + "logits/rejected": -2.574957847595215, + "logps/chosen": -0.7867820858955383, + "logps/rejected": -0.9555041193962097, + "loss": 0.4774, + "nll_loss": 0.45714274048805237, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0393391028046608, + "rewards/margins": 0.008436103351414204, + "rewards/rejected": -0.047775208950042725, "step": 560 }, { "epoch": 0.5977975878342947, - "grad_norm": 2.015759366014609, - "learning_rate": 8.37707816583391e-06, - "log_odds_chosen": 0.1689465194940567, - "log_odds_ratio": -0.7204016447067261, - "logits/chosen": -3.082165241241455, - "logits/rejected": -3.113685369491577, - "logps/chosen": -0.8903343081474304, - "logps/rejected": -1.0027625560760498, - "loss": 0.5039, - "nll_loss": 0.5279403924942017, - "rewards/accuracies": 0.5062500238418579, - "rewards/chosen": -0.04451671987771988, - "rewards/margins": 0.0056214118376374245, - "rewards/rejected": -0.05013813450932503, + "grad_norm": 2.3757421806444343, + "learning_rate": 2.9319773580418683e-06, + "log_odds_chosen": 0.2533697485923767, + "log_odds_ratio": -0.6926103830337524, + "logits/chosen": -2.662379264831543, + "logits/rejected": -2.6397509574890137, + "logps/chosen": -0.7862294316291809, + "logps/rejected": -0.9584717750549316, + "loss": 0.463, + "nll_loss": 0.4819509983062744, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.039311472326517105, + "rewards/margins": 0.00861212145537138, + "rewards/rejected": -0.04792358726263046, "step": 570 }, { "epoch": 0.6082852648138437, - "grad_norm": 1.8532059123988396, - "learning_rate": 8.304547985373997e-06, - "log_odds_chosen": 0.27719905972480774, - "log_odds_ratio": -0.6604655385017395, - "logits/chosen": -3.164926528930664, - "logits/rejected": -3.1809298992156982, - "logps/chosen": -0.8681858777999878, - "logps/rejected": -1.0584015846252441, - "loss": 0.5449, - "nll_loss": 0.48173967003822327, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.04340929910540581, - "rewards/margins": 0.009510790929198265, - "rewards/rejected": -0.052920084446668625, + "grad_norm": 2.172213103107974, + "learning_rate": 2.906591794880899e-06, + "log_odds_chosen": 0.3392280340194702, + "log_odds_ratio": -0.6386864185333252, + "logits/chosen": -2.6814630031585693, + "logits/rejected": -2.6795036792755127, + "logps/chosen": -0.7794855833053589, + "logps/rejected": -1.0036094188690186, + "loss": 0.4996, + "nll_loss": 0.4401033818721771, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.038974277675151825, + "rewards/margins": 0.011206192895770073, + "rewards/rejected": -0.05018047243356705, "step": 580 }, { "epoch": 0.6187729417933928, - "grad_norm": 1.9696416884513863, - "learning_rate": 8.233869695926184e-06, - "log_odds_chosen": 0.3565579056739807, - "log_odds_ratio": -0.6653521656990051, - "logits/chosen": -3.1371326446533203, - "logits/rejected": -3.1804890632629395, - "logps/chosen": -0.8285515904426575, - "logps/rejected": -1.060605764389038, - "loss": 0.5115, - "nll_loss": 0.5481864213943481, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04142758250236511, - "rewards/margins": 0.011602701619267464, - "rewards/rejected": -0.05303028225898743, + "grad_norm": 2.0671922387658377, + "learning_rate": 2.8818543935741638e-06, + "log_odds_chosen": 0.3985132575035095, + "log_odds_ratio": -0.6514524221420288, + "logits/chosen": -2.6682472229003906, + "logits/rejected": -2.679994821548462, + "logps/chosen": -0.7318185567855835, + "logps/rejected": -0.9744182825088501, + "loss": 0.4678, + "nll_loss": 0.49909916520118713, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.03659093379974365, + "rewards/margins": 0.012129982002079487, + "rewards/rejected": -0.048720914870500565, "step": 590 }, { "epoch": 0.6292606187729418, - "grad_norm": 2.0728707870222607, - "learning_rate": 8.164965809277262e-06, - "log_odds_chosen": 0.3636320233345032, - "log_odds_ratio": -0.6437779664993286, - "logits/chosen": -3.155708074569702, - "logits/rejected": -3.155524492263794, - "logps/chosen": -0.8240157961845398, - "logps/rejected": -1.06477952003479, - "loss": 0.5146, - "nll_loss": 0.4843020439147949, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04120079427957535, - "rewards/margins": 0.012038188055157661, - "rewards/rejected": -0.05323898047208786, + "grad_norm": 2.1967713493078604, + "learning_rate": 2.8577380332470414e-06, + "log_odds_chosen": 0.35757365822792053, + "log_odds_ratio": -0.6395149230957031, + "logits/chosen": -2.663159132003784, + "logits/rejected": -2.649722099304199, + "logps/chosen": -0.7385202646255493, + "logps/rejected": -0.9542753100395203, + "loss": 0.4725, + "nll_loss": 0.4449065625667572, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.03692600876092911, + "rewards/margins": 0.010787753388285637, + "rewards/rejected": -0.04771377146244049, "step": 600 }, { "epoch": 0.6292606187729418, - "eval_log_odds_chosen": 0.312126487493515, - "eval_log_odds_ratio": -0.6417948603630066, - "eval_logits/chosen": -3.127530336380005, - "eval_logits/rejected": -3.1324751377105713, - "eval_logps/chosen": -0.8164808750152588, - "eval_logps/rejected": -1.016471028327942, - "eval_loss": 0.5239931344985962, - "eval_nll_loss": 0.4882962703704834, - "eval_rewards/accuracies": 0.6230158805847168, - "eval_rewards/chosen": -0.0408240407705307, - "eval_rewards/margins": 0.00999950896948576, - "eval_rewards/rejected": -0.050823554396629333, - "eval_runtime": 137.2676, - "eval_samples_per_second": 14.526, - "eval_steps_per_second": 0.459, + "eval_log_odds_chosen": 0.35674363374710083, + "eval_log_odds_ratio": -0.631996214389801, + "eval_logits/chosen": -2.647721767425537, + "eval_logits/rejected": -2.6147334575653076, + "eval_logps/chosen": -0.7248181104660034, + "eval_logps/rejected": -0.9394434690475464, + "eval_loss": 0.4794267416000366, + "eval_nll_loss": 0.44346076250076294, + "eval_rewards/accuracies": 0.6349206566810608, + "eval_rewards/chosen": -0.03624090179800987, + "eval_rewards/margins": 0.01073127705603838, + "eval_rewards/rejected": -0.046972181648015976, + "eval_runtime": 137.9534, + "eval_samples_per_second": 14.454, + "eval_steps_per_second": 0.457, "step": 600 }, { "epoch": 0.6397482957524908, - "grad_norm": 2.2204480702078246, - "learning_rate": 8.097763301789162e-06, - "log_odds_chosen": 0.1712610125541687, - "log_odds_ratio": -0.705093502998352, - "logits/chosen": -3.0651237964630127, - "logits/rejected": -3.0982956886291504, - "logps/chosen": -0.8816771507263184, - "logps/rejected": -0.989287257194519, - "loss": 0.526, - "nll_loss": 0.48726779222488403, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.044083863496780396, - "rewards/margins": 0.0053805033676326275, - "rewards/rejected": -0.04946436733007431, + "grad_norm": 2.2292431160793216, + "learning_rate": 2.834217155626206e-06, + "log_odds_chosen": 0.23770160973072052, + "log_odds_ratio": -0.6840949654579163, + "logits/chosen": -2.5699760913848877, + "logits/rejected": -2.5653116703033447, + "logps/chosen": -0.7841805219650269, + "logps/rejected": -0.9241795539855957, + "loss": 0.4832, + "nll_loss": 0.4458464980125427, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.039209023118019104, + "rewards/margins": 0.006999955512583256, + "rewards/rejected": -0.046208981424570084, "step": 610 }, { "epoch": 0.6502359727320398, - "grad_norm": 2.0795066851294, - "learning_rate": 8.03219328902499e-06, - "log_odds_chosen": 0.18011939525604248, - "log_odds_ratio": -0.7075856328010559, - "logits/chosen": -3.093158721923828, - "logits/rejected": -3.1170780658721924, - "logps/chosen": -0.8789434432983398, - "logps/rejected": -1.0122572183609009, - "loss": 0.5293, - "nll_loss": 0.5134457945823669, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.043947167694568634, - "rewards/margins": 0.006665694061666727, - "rewards/rejected": -0.050612859427928925, + "grad_norm": 2.2910730765164247, + "learning_rate": 2.811267651158746e-06, + "log_odds_chosen": 0.21747846901416779, + "log_odds_ratio": -0.6945130825042725, + "logits/chosen": -2.724179744720459, + "logits/rejected": -2.691539764404297, + "logps/chosen": -0.7931413054466248, + "logps/rejected": -0.943394660949707, + "loss": 0.487, + "nll_loss": 0.4727168679237366, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.03965706750750542, + "rewards/margins": 0.007512666285037994, + "rewards/rejected": -0.04716973379254341, "step": 620 }, { "epoch": 0.6607236497115889, - "grad_norm": 2.0001788984831514, - "learning_rate": 7.968190728895958e-06, - "log_odds_chosen": 0.2610745429992676, - "log_odds_ratio": -0.6974207758903503, - "logits/chosen": -3.0472846031188965, - "logits/rejected": -3.0721120834350586, - "logps/chosen": -0.8566058874130249, - "logps/rejected": -1.0223418474197388, - "loss": 0.5372, - "nll_loss": 0.5244878530502319, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.042830295860767365, - "rewards/margins": 0.00828679371625185, - "rewards/rejected": -0.05111708492040634, + "grad_norm": 2.2609308397995616, + "learning_rate": 2.788866755113585e-06, + "log_odds_chosen": 0.29844212532043457, + "log_odds_ratio": -0.690433919429779, + "logits/chosen": -2.718883991241455, + "logits/rejected": -2.7198710441589355, + "logps/chosen": -0.7700183391571045, + "logps/rejected": -0.9475862383842468, + "loss": 0.4893, + "nll_loss": 0.48064035177230835, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.038500916212797165, + "rewards/margins": 0.00887839961796999, + "rewards/rejected": -0.04737931489944458, "step": 630 }, { "epoch": 0.6712113266911379, - "grad_norm": 2.3414302184737332, - "learning_rate": 7.905694150420949e-06, - "log_odds_chosen": 0.30453813076019287, - "log_odds_ratio": -0.6686201095581055, - "logits/chosen": -3.0571064949035645, - "logits/rejected": -3.079134464263916, - "logps/chosen": -0.8609515428543091, - "logps/rejected": -1.0473490953445435, - "loss": 0.5151, - "nll_loss": 0.46057072281837463, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04304756969213486, - "rewards/margins": 0.009319878183305264, - "rewards/rejected": -0.05236745625734329, + "grad_norm": 2.6649009571693107, + "learning_rate": 2.7669929526473316e-06, + "log_odds_chosen": 0.4156903326511383, + "log_odds_ratio": -0.6158550977706909, + "logits/chosen": -2.7182445526123047, + "logits/rejected": -2.6942853927612305, + "logps/chosen": -0.7768423557281494, + "logps/rejected": -1.0251133441925049, + "loss": 0.4711, + "nll_loss": 0.41822823882102966, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03884211927652359, + "rewards/margins": 0.012413550168275833, + "rewards/rejected": -0.051255665719509125, "step": 640 }, { "epoch": 0.6816990036706869, - "grad_norm": 1.9074311662484937, - "learning_rate": 7.844645405527363e-06, - "log_odds_chosen": 0.21438069641590118, - "log_odds_ratio": -0.7022002935409546, - "logits/chosen": -3.058842897415161, - "logits/rejected": -3.0864357948303223, - "logps/chosen": -0.8311389684677124, - "logps/rejected": -0.9654434323310852, - "loss": 0.5332, - "nll_loss": 0.5123748183250427, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.0415569506585598, - "rewards/margins": 0.006715219467878342, - "rewards/rejected": -0.04827217012643814, + "grad_norm": 2.0343884705834268, + "learning_rate": 2.745625891934577e-06, + "log_odds_chosen": 0.23737592995166779, + "log_odds_ratio": -0.6948662996292114, + "logits/chosen": -2.74450421333313, + "logits/rejected": -2.7467565536499023, + "logps/chosen": -0.7428392767906189, + "logps/rejected": -0.8866605758666992, + "loss": 0.4898, + "nll_loss": 0.4688393175601959, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03714196756482124, + "rewards/margins": 0.00719106663018465, + "rewards/rejected": -0.04433303326368332, "step": 650 }, { "epoch": 0.6921866806502359, - "grad_norm": 1.9616180703535884, - "learning_rate": 7.78498944161523e-06, - "log_odds_chosen": 0.3507782816886902, - "log_odds_ratio": -0.641882061958313, - "logits/chosen": -3.0647902488708496, - "logits/rejected": -3.1045496463775635, - "logps/chosen": -0.8823181390762329, - "logps/rejected": -1.1245914697647095, - "loss": 0.5293, - "nll_loss": 0.48711147904396057, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.044115908443927765, - "rewards/margins": 0.012113666161894798, - "rewards/rejected": -0.05622958019375801, + "grad_norm": 2.0637062426142556, + "learning_rate": 2.7247463045653303e-06, + "log_odds_chosen": 0.36518558859825134, + "log_odds_ratio": -0.6426655650138855, + "logits/chosen": -2.7563986778259277, + "logits/rejected": -2.74312424659729, + "logps/chosen": -0.7905346751213074, + "logps/rejected": -1.0196200609207153, + "loss": 0.4859, + "nll_loss": 0.4443667531013489, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03952673822641373, + "rewards/margins": 0.011454259976744652, + "rewards/rejected": -0.05098099634051323, "step": 660 }, { "epoch": 0.702674357629785, - "grad_norm": 2.2401170633783427, - "learning_rate": 7.726674092862559e-06, - "log_odds_chosen": 0.4617346227169037, - "log_odds_ratio": -0.627942681312561, - "logits/chosen": -3.0200469493865967, - "logits/rejected": -3.0557796955108643, - "logps/chosen": -0.8328607678413391, - "logps/rejected": -1.140726923942566, - "loss": 0.5237, - "nll_loss": 0.46908053755760193, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.041643042117357254, - "rewards/margins": 0.015393314883112907, - "rewards/rejected": -0.057036347687244415, + "grad_norm": 1.992995386941069, + "learning_rate": 2.704335932501895e-06, + "log_odds_chosen": 0.490286260843277, + "log_odds_ratio": -0.6087489724159241, + "logits/chosen": -2.72459077835083, + "logits/rejected": -2.7280569076538086, + "logps/chosen": -0.7373065948486328, + "logps/rejected": -1.0489108562469482, + "loss": 0.4831, + "nll_loss": 0.42895203828811646, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.03686532750725746, + "rewards/margins": 0.01558021642267704, + "rewards/rejected": -0.05244554951786995, "step": 670 }, { "epoch": 0.713162034609334, - "grad_norm": 2.00824540701018, - "learning_rate": 7.669649888473705e-06, - "log_odds_chosen": 0.36505717039108276, - "log_odds_ratio": -0.6428455114364624, - "logits/chosen": -3.0360779762268066, - "logits/rejected": -3.044907808303833, - "logps/chosen": -0.8793157339096069, - "logps/rejected": -1.1065771579742432, - "loss": 0.5083, - "nll_loss": 0.4951552450656891, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.04396578669548035, - "rewards/margins": 0.0113630760461092, - "rewards/rejected": -0.0553288571536541, + "grad_norm": 2.8251895935339886, + "learning_rate": 2.6843774609657963e-06, + "log_odds_chosen": 0.3856969177722931, + "log_odds_ratio": -0.6318041086196899, + "logits/chosen": -2.7299182415008545, + "logits/rejected": -2.699131488800049, + "logps/chosen": -0.7913435697555542, + "logps/rejected": -1.0201423168182373, + "loss": 0.4669, + "nll_loss": 0.45303601026535034, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.03956717997789383, + "rewards/margins": 0.011439927853643894, + "rewards/rejected": -0.05100711062550545, "step": 680 }, { "epoch": 0.723649711588883, - "grad_norm": 1.8606652251395144, - "learning_rate": 7.61386987626881e-06, - "log_odds_chosen": 0.2045813500881195, - "log_odds_ratio": -0.7114613056182861, - "logits/chosen": -3.036839723587036, - "logits/rejected": -3.0589654445648193, - "logps/chosen": -0.8661033511161804, - "logps/rejected": -1.014004111289978, - "loss": 0.5313, - "nll_loss": 0.5510386824607849, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.0433051735162735, - "rewards/margins": 0.007395035121589899, - "rewards/rejected": -0.05070021003484726, + "grad_norm": 2.3126283290431457, + "learning_rate": 2.6648544566940834e-06, + "log_odds_chosen": 0.21687667071819305, + "log_odds_ratio": -0.7159269452095032, + "logits/chosen": -2.7354016304016113, + "logits/rejected": -2.722414493560791, + "logps/chosen": -0.7863477468490601, + "logps/rejected": -0.9429599046707153, + "loss": 0.4903, + "nll_loss": 0.5047397613525391, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03931739181280136, + "rewards/margins": 0.007830603048205376, + "rewards/rejected": -0.04714799299836159, "step": 690 }, { "epoch": 0.7341373885684321, - "grad_norm": 2.2895278902082747, - "learning_rate": 7.559289460184545e-06, - "log_odds_chosen": 0.34833860397338867, - "log_odds_ratio": -0.6269202828407288, - "logits/chosen": -3.0252926349639893, - "logits/rejected": -3.068871021270752, - "logps/chosen": -0.8163930177688599, - "logps/rejected": -1.0459128618240356, - "loss": 0.5298, - "nll_loss": 0.5428040623664856, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04081965237855911, - "rewards/margins": 0.01147598959505558, - "rewards/rejected": -0.05229564383625984, + "grad_norm": 2.323029961728673, + "learning_rate": 2.6457513110645903e-06, + "log_odds_chosen": 0.342260479927063, + "log_odds_ratio": -0.6298097968101501, + "logits/chosen": -2.679320812225342, + "logits/rejected": -2.6582911014556885, + "logps/chosen": -0.7469282746315002, + "logps/rejected": -0.9541714787483215, + "loss": 0.4875, + "nll_loss": 0.4991229474544525, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.03734641522169113, + "rewards/margins": 0.010362156666815281, + "rewards/rejected": -0.04770857095718384, "step": 700 }, { "epoch": 0.7341373885684321, - "eval_log_odds_chosen": 0.3869401812553406, - "eval_log_odds_ratio": -0.6218506097793579, - "eval_logits/chosen": -3.0754599571228027, - "eval_logits/rejected": -3.076083183288574, - "eval_logps/chosen": -0.8267216682434082, - "eval_logps/rejected": -1.0827099084854126, - "eval_loss": 0.5187779068946838, - "eval_nll_loss": 0.4841572344303131, - "eval_rewards/accuracies": 0.6428571343421936, - "eval_rewards/chosen": -0.04133608192205429, - "eval_rewards/margins": 0.012799412943422794, - "eval_rewards/rejected": -0.05413549765944481, - "eval_runtime": 137.1864, - "eval_samples_per_second": 14.535, - "eval_steps_per_second": 0.459, + "eval_log_odds_chosen": 0.417955607175827, + "eval_log_odds_ratio": -0.6158252358436584, + "eval_logits/chosen": -2.7213134765625, + "eval_logits/rejected": -2.691012144088745, + "eval_logps/chosen": -0.7365118861198425, + "eval_logps/rejected": -0.9954525232315063, + "eval_loss": 0.47666841745376587, + "eval_nll_loss": 0.441643089056015, + "eval_rewards/accuracies": 0.6408730149269104, + "eval_rewards/chosen": -0.036825601011514664, + "eval_rewards/margins": 0.01294703409075737, + "eval_rewards/rejected": -0.049772635102272034, + "eval_runtime": 140.8809, + "eval_samples_per_second": 14.154, + "eval_steps_per_second": 0.447, "step": 700 }, { "epoch": 0.7446250655479811, - "grad_norm": 1.958829045282282, - "learning_rate": 7.505866250408016e-06, - "log_odds_chosen": 0.2794094383716583, - "log_odds_ratio": -0.6572638750076294, - "logits/chosen": -3.1184074878692627, - "logits/rejected": -3.1369974613189697, - "logps/chosen": -0.8444921374320984, - "logps/rejected": -1.0439577102661133, - "loss": 0.5242, - "nll_loss": 0.47964978218078613, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04222460836172104, - "rewards/margins": 0.00997327920049429, - "rewards/rejected": -0.052197881042957306, + "grad_norm": 2.2253143227977055, + "learning_rate": 2.627053187642805e-06, + "log_odds_chosen": 0.31003057956695557, + "log_odds_ratio": -0.6495457887649536, + "logits/chosen": -2.7463955879211426, + "logits/rejected": -2.7364678382873535, + "logps/chosen": -0.7539780139923096, + "logps/rejected": -0.9565252065658569, + "loss": 0.4819, + "nll_loss": 0.4394974708557129, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.0376988984644413, + "rewards/margins": 0.010127360001206398, + "rewards/rejected": -0.047826264053583145, "step": 710 }, { "epoch": 0.7551127425275301, - "grad_norm": 1.8049248182957538, - "learning_rate": 7.4535599249993e-06, - "log_odds_chosen": 0.36963027715682983, - "log_odds_ratio": -0.6443501710891724, - "logits/chosen": -3.075653076171875, - "logits/rejected": -3.0980098247528076, - "logps/chosen": -0.7987631559371948, - "logps/rejected": -1.03029465675354, - "loss": 0.5308, - "nll_loss": 0.4633590281009674, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.03993815928697586, - "rewards/margins": 0.011576572433114052, - "rewards/rejected": -0.051514726132154465, + "grad_norm": 1.9919741933282713, + "learning_rate": 2.6087459737497545e-06, + "log_odds_chosen": 0.40133896470069885, + "log_odds_ratio": -0.6439169645309448, + "logits/chosen": -2.7264726161956787, + "logits/rejected": -2.7285008430480957, + "logps/chosen": -0.7132266759872437, + "logps/rejected": -0.9523170590400696, + "loss": 0.4904, + "nll_loss": 0.42442673444747925, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03566133230924606, + "rewards/margins": 0.011954517103731632, + "rewards/rejected": -0.04761584475636482, "step": 720 }, { "epoch": 0.7656004195070791, - "grad_norm": 2.1907119668628807, - "learning_rate": 7.402332101976053e-06, - "log_odds_chosen": 0.1018507108092308, - "log_odds_ratio": -0.7229408621788025, - "logits/chosen": -3.084719181060791, - "logits/rejected": -3.0846333503723145, - "logps/chosen": -0.8332414627075195, - "logps/rejected": -0.8869687914848328, - "loss": 0.5377, - "nll_loss": 0.5031158328056335, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.041662074625492096, - "rewards/margins": 0.00268636760301888, - "rewards/rejected": -0.04434844106435776, + "grad_norm": 2.5524316814232657, + "learning_rate": 2.5908162356916185e-06, + "log_odds_chosen": 0.1571163833141327, + "log_odds_ratio": -0.7166911363601685, + "logits/chosen": -2.805894613265991, + "logits/rejected": -2.7996468544006348, + "logps/chosen": -0.7540133595466614, + "logps/rejected": -0.8382581472396851, + "loss": 0.4937, + "nll_loss": 0.4598192572593689, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.03770066425204277, + "rewards/margins": 0.004212243482470512, + "rewards/rejected": -0.041912905871868134, "step": 730 }, { "epoch": 0.7760880964866282, - "grad_norm": 2.050092986168091, - "learning_rate": 7.352146220938079e-06, - "log_odds_chosen": 0.3393878936767578, - "log_odds_ratio": -0.6246740221977234, - "logits/chosen": -3.119809627532959, - "logits/rejected": -3.132826328277588, - "logps/chosen": -0.804786205291748, - "logps/rejected": -1.0171911716461182, - "loss": 0.5308, - "nll_loss": 0.4794273376464844, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.040239304304122925, - "rewards/margins": 0.010620243847370148, - "rewards/rejected": -0.05085955187678337, + "grad_norm": 2.1353118528501684, + "learning_rate": 2.5732511773283276e-06, + "log_odds_chosen": 0.35292255878448486, + "log_odds_ratio": -0.625573992729187, + "logits/chosen": -2.8535656929016113, + "logits/rejected": -2.8482494354248047, + "logps/chosen": -0.7254922389984131, + "logps/rejected": -0.9415895342826843, + "loss": 0.4903, + "nll_loss": 0.4391508996486664, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.036274611949920654, + "rewards/margins": 0.010804859921336174, + "rewards/rejected": -0.04707947373390198, "step": 740 }, { "epoch": 0.7865757734661772, - "grad_norm": 2.0193892114327556, - "learning_rate": 7.3029674334022146e-06, - "log_odds_chosen": 0.2425309419631958, - "log_odds_ratio": -0.6716917753219604, - "logits/chosen": -3.093583106994629, - "logits/rejected": -3.114816188812256, - "logps/chosen": -0.8740803599357605, - "logps/rejected": -1.0157320499420166, - "loss": 0.5427, - "nll_loss": 0.4982066750526428, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04370402172207832, - "rewards/margins": 0.007082589901983738, - "rewards/rejected": -0.05078660696744919, + "grad_norm": 2.076299852744321, + "learning_rate": 2.556038601690775e-06, + "log_odds_chosen": 0.27716293931007385, + "log_odds_ratio": -0.6662799119949341, + "logits/chosen": -2.8263370990753174, + "logits/rejected": -2.8200631141662598, + "logps/chosen": -0.7884274125099182, + "logps/rejected": -0.9425498843193054, + "loss": 0.5033, + "nll_loss": 0.460857093334198, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.03942137211561203, + "rewards/margins": 0.00770611921325326, + "rewards/rejected": -0.04712748900055885, "step": 750 }, { "epoch": 0.7970634504457262, - "grad_norm": 1.891204637475333, - "learning_rate": 7.254762501100117e-06, - "log_odds_chosen": 0.2664291262626648, - "log_odds_ratio": -0.6672528386116028, - "logits/chosen": -3.0630593299865723, - "logits/rejected": -3.0695788860321045, - "logps/chosen": -0.8163594007492065, - "logps/rejected": -0.993925929069519, - "loss": 0.5114, - "nll_loss": 0.40486717224121094, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.040817975997924805, - "rewards/margins": 0.00887832697480917, - "rewards/rejected": -0.049696292728185654, + "grad_norm": 2.144911846283459, + "learning_rate": 2.539166875385041e-06, + "log_odds_chosen": 0.28878992795944214, + "log_odds_ratio": -0.6523956060409546, + "logits/chosen": -2.827876567840576, + "logits/rejected": -2.818580389022827, + "logps/chosen": -0.7346550226211548, + "logps/rejected": -0.9111967086791992, + "loss": 0.4719, + "nll_loss": 0.3698672354221344, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0367327556014061, + "rewards/margins": 0.008827080950140953, + "rewards/rejected": -0.0455598309636116, "step": 760 }, { "epoch": 0.8075511274252754, - "grad_norm": 2.0675479903273914, - "learning_rate": 7.207499701564472e-06, - "log_odds_chosen": 0.23201966285705566, - "log_odds_ratio": -0.6995107531547546, - "logits/chosen": -3.027050018310547, - "logits/rejected": -3.0489039421081543, - "logps/chosen": -0.8810374140739441, - "logps/rejected": -1.0541043281555176, - "loss": 0.5343, - "nll_loss": 0.5017890334129333, - "rewards/accuracies": 0.5062500238418579, - "rewards/chosen": -0.0440518744289875, - "rewards/margins": 0.008653342723846436, - "rewards/rejected": -0.05270521715283394, + "grad_norm": 2.457074288822972, + "learning_rate": 2.522624895547565e-06, + "log_odds_chosen": 0.2632114589214325, + "log_odds_ratio": -0.6844597458839417, + "logits/chosen": -2.785381317138672, + "logits/rejected": -2.7871222496032715, + "logps/chosen": -0.796169102191925, + "logps/rejected": -0.9764283895492554, + "loss": 0.4935, + "nll_loss": 0.4608798921108246, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.039808452129364014, + "rewards/margins": 0.009012967348098755, + "rewards/rejected": -0.048821426928043365, "step": 770 }, { "epoch": 0.8180388044048243, - "grad_norm": 1.9571785710156353, - "learning_rate": 7.1611487403943295e-06, - "log_odds_chosen": 0.23842506110668182, - "log_odds_ratio": -0.672247052192688, - "logits/chosen": -3.062586545944214, - "logits/rejected": -3.0935113430023193, - "logps/chosen": -0.8818261027336121, - "logps/rejected": -1.0167505741119385, - "loss": 0.5467, - "nll_loss": 0.5480509996414185, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04409131035208702, - "rewards/margins": 0.006746229715645313, - "rewards/rejected": -0.05083753541111946, + "grad_norm": 2.1250851855347417, + "learning_rate": 2.506402059138015e-06, + "log_odds_chosen": 0.2769099771976471, + "log_odds_ratio": -0.6522020101547241, + "logits/chosen": -2.8049657344818115, + "logits/rejected": -2.8198862075805664, + "logps/chosen": -0.7881239056587219, + "logps/rejected": -0.9357802271842957, + "loss": 0.5049, + "nll_loss": 0.5033601522445679, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.039406199008226395, + "rewards/margins": 0.0073828138411045074, + "rewards/rejected": -0.0467890128493309, "step": 780 }, { "epoch": 0.8285264813843733, - "grad_norm": 1.8565884413084413, - "learning_rate": 7.115680669648201e-06, - "log_odds_chosen": 0.32895228266716003, - "log_odds_ratio": -0.6478875875473022, - "logits/chosen": -3.1025116443634033, - "logits/rejected": -3.1219050884246826, - "logps/chosen": -0.8189374804496765, - "logps/rejected": -1.0338833332061768, - "loss": 0.5049, - "nll_loss": 0.44281667470932007, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.040946874767541885, - "rewards/margins": 0.010747292079031467, - "rewards/rejected": -0.05169416218996048, + "grad_norm": 2.1157883450641966, + "learning_rate": 2.49048823437687e-06, + "log_odds_chosen": 0.4010138511657715, + "log_odds_ratio": -0.6229840517044067, + "logits/chosen": -2.8338706493377686, + "logits/rejected": -2.8394291400909424, + "logps/chosen": -0.7245864272117615, + "logps/rejected": -0.9661226272583008, + "loss": 0.4661, + "nll_loss": 0.4065842032432556, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03622932359576225, + "rewards/margins": 0.01207680907100439, + "rewards/rejected": -0.04830613359808922, "step": 790 }, { "epoch": 0.8390141583639223, - "grad_norm": 2.106485781152954, - "learning_rate": 7.0710678118654756e-06, - "log_odds_chosen": 0.4608131945133209, - "log_odds_ratio": -0.5961465835571289, - "logits/chosen": -3.092484951019287, - "logits/rejected": -3.090536117553711, - "logps/chosen": -0.7798897624015808, - "logps/rejected": -1.0744028091430664, - "loss": 0.5181, - "nll_loss": 0.4202440679073334, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": -0.03899449110031128, - "rewards/margins": 0.014725650660693645, - "rewards/rejected": -0.0537201389670372, + "grad_norm": 2.3895076758034515, + "learning_rate": 2.474873734152916e-06, + "log_odds_chosen": 0.48685508966445923, + "log_odds_ratio": -0.5867618918418884, + "logits/chosen": -2.813389301300049, + "logits/rejected": -2.7975525856018066, + "logps/chosen": -0.6979315876960754, + "logps/rejected": -1.0023411512374878, + "loss": 0.4796, + "nll_loss": 0.3860110640525818, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.03489658236503601, + "rewards/margins": 0.015220480971038342, + "rewards/rejected": -0.050117067992687225, "step": 800 }, { "epoch": 0.8390141583639223, - "eval_log_odds_chosen": 0.35056135058403015, - "eval_log_odds_ratio": -0.6322371363639832, - "eval_logits/chosen": -3.139373302459717, - "eval_logits/rejected": -3.1382317543029785, - "eval_logps/chosen": -0.8198128342628479, - "eval_logps/rejected": -1.0474979877471924, - "eval_loss": 0.5140993595123291, - "eval_nll_loss": 0.4803001582622528, - "eval_rewards/accuracies": 0.6329365372657776, - "eval_rewards/chosen": -0.040990639477968216, - "eval_rewards/margins": 0.011384249664843082, - "eval_rewards/rejected": -0.05237489193677902, - "eval_runtime": 136.2293, - "eval_samples_per_second": 14.637, - "eval_steps_per_second": 0.462, + "eval_log_odds_chosen": 0.4362943768501282, + "eval_log_odds_ratio": -0.6168639063835144, + "eval_logits/chosen": -2.8114309310913086, + "eval_logits/rejected": -2.791295289993286, + "eval_logps/chosen": -0.7415919303894043, + "eval_logps/rejected": -1.016213297843933, + "eval_loss": 0.4739992916584015, + "eval_nll_loss": 0.4396199584007263, + "eval_rewards/accuracies": 0.6507936716079712, + "eval_rewards/chosen": -0.037079595029354095, + "eval_rewards/margins": 0.013731070794165134, + "eval_rewards/rejected": -0.050810668617486954, + "eval_runtime": 137.8725, + "eval_samples_per_second": 14.463, + "eval_steps_per_second": 0.457, "step": 800 }, { "epoch": 0.8495018353434715, - "grad_norm": 1.919736952774634, - "learning_rate": 7.027283689263066e-06, - "log_odds_chosen": 0.3574589788913727, - "log_odds_ratio": -0.6265517473220825, - "logits/chosen": -3.0922906398773193, - "logits/rejected": -3.093270778656006, - "logps/chosen": -0.8058309555053711, - "logps/rejected": -1.0188381671905518, - "loss": 0.5132, - "nll_loss": 0.4754185676574707, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04029155150055885, - "rewards/margins": 0.010650361888110638, - "rewards/rejected": -0.050941914319992065, + "grad_norm": 2.2171962411607398, + "learning_rate": 2.459549291242073e-06, + "log_odds_chosen": 0.4064277708530426, + "log_odds_ratio": -0.6227105259895325, + "logits/chosen": -2.8798890113830566, + "logits/rejected": -2.8490796089172363, + "logps/chosen": -0.729169487953186, + "logps/rejected": -0.9680086970329285, + "loss": 0.4744, + "nll_loss": 0.4338308870792389, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03645847737789154, + "rewards/margins": 0.011941960081458092, + "rewards/rejected": -0.04840043932199478, "step": 810 }, { "epoch": 0.8599895123230205, - "grad_norm": 2.3619475771455214, - "learning_rate": 6.984302957695783e-06, - "log_odds_chosen": 0.2932414412498474, - "log_odds_ratio": -0.6586158275604248, - "logits/chosen": -3.0357770919799805, - "logits/rejected": -3.0360379219055176, - "logps/chosen": -0.842557430267334, - "logps/rejected": -1.0188366174697876, - "loss": 0.505, - "nll_loss": 0.4280059337615967, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04212787002325058, - "rewards/margins": 0.008813952095806599, - "rewards/rejected": -0.0509418249130249, + "grad_norm": 2.607409368726623, + "learning_rate": 2.4445060351935238e-06, + "log_odds_chosen": 0.3091586232185364, + "log_odds_ratio": -0.6474903225898743, + "logits/chosen": -2.820725679397583, + "logits/rejected": -2.804964303970337, + "logps/chosen": -0.7581018805503845, + "logps/rejected": -0.9343080520629883, + "loss": 0.4661, + "nll_loss": 0.3911210894584656, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.03790510073304176, + "rewards/margins": 0.00881030224263668, + "rewards/rejected": -0.046715401113033295, "step": 820 }, { "epoch": 0.8704771893025695, - "grad_norm": 2.3824306185771267, - "learning_rate": 6.942101345006233e-06, - "log_odds_chosen": 0.2479257881641388, - "log_odds_ratio": -0.702430248260498, - "logits/chosen": -3.008411407470703, - "logits/rejected": -3.05663800239563, - "logps/chosen": -0.853378415107727, - "logps/rejected": -1.0239073038101196, - "loss": 0.5248, - "nll_loss": 0.4657117426395416, - "rewards/accuracies": 0.5062500238418579, - "rewards/chosen": -0.04266892373561859, - "rewards/margins": 0.00852644257247448, - "rewards/rejected": -0.05119536444544792, + "grad_norm": 2.6267861444652034, + "learning_rate": 2.4297354707521817e-06, + "log_odds_chosen": 0.21734324097633362, + "log_odds_ratio": -0.7081775069236755, + "logits/chosen": -2.805722236633301, + "logits/rejected": -2.8377511501312256, + "logps/chosen": -0.777400553226471, + "logps/rejected": -0.915818989276886, + "loss": 0.4873, + "nll_loss": 0.4305228292942047, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.03887002915143967, + "rewards/margins": 0.0069209253415465355, + "rewards/rejected": -0.04579095169901848, "step": 830 }, { "epoch": 0.8809648662821186, - "grad_norm": 1.9624325890421999, - "learning_rate": 6.900655593423542e-06, - "log_odds_chosen": 0.2082471400499344, - "log_odds_ratio": -0.6889498233795166, - "logits/chosen": -3.040546178817749, - "logits/rejected": -3.0660147666931152, - "logps/chosen": -0.8756462931632996, - "logps/rejected": -1.0124717950820923, - "loss": 0.5137, - "nll_loss": 0.4855361580848694, - "rewards/accuracies": 0.53125, - "rewards/chosen": -0.043782319873571396, - "rewards/margins": 0.006841268390417099, - "rewards/rejected": -0.050623588263988495, + "grad_norm": 2.1614161917289363, + "learning_rate": 2.4152294576982395e-06, + "log_odds_chosen": 0.21988508105278015, + "log_odds_ratio": -0.6872502565383911, + "logits/chosen": -2.8258466720581055, + "logits/rejected": -2.8268680572509766, + "logps/chosen": -0.7874829769134521, + "logps/rejected": -0.9251054525375366, + "loss": 0.4733, + "nll_loss": 0.4440709054470062, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.03937415033578873, + "rewards/margins": 0.006881123874336481, + "rewards/rejected": -0.04625527560710907, "step": 840 }, { "epoch": 0.8914525432616676, - "grad_norm": 2.0144554917595756, - "learning_rate": 6.859943405700353e-06, - "log_odds_chosen": 0.3205421566963196, - "log_odds_ratio": -0.6371484994888306, - "logits/chosen": -3.054384231567383, - "logits/rejected": -3.0986409187316895, - "logps/chosen": -0.8319618105888367, - "logps/rejected": -1.0313116312026978, - "loss": 0.5044, - "nll_loss": 0.4881317615509033, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.041598085314035416, - "rewards/margins": 0.009967491030693054, - "rewards/rejected": -0.05156558007001877, + "grad_norm": 2.2102319814571074, + "learning_rate": 2.4009801919951233e-06, + "log_odds_chosen": 0.3129335641860962, + "log_odds_ratio": -0.6348214149475098, + "logits/chosen": -2.8568568229675293, + "logits/rejected": -2.865201473236084, + "logps/chosen": -0.749543309211731, + "logps/rejected": -0.9329560399055481, + "loss": 0.466, + "nll_loss": 0.4490523934364319, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.03747716546058655, + "rewards/margins": 0.009170634672045708, + "rewards/rejected": -0.046647801995277405, "step": 850 }, { "epoch": 0.9019402202412166, - "grad_norm": 1.9341957217840544, - "learning_rate": 6.819943394704736e-06, - "log_odds_chosen": 0.26728707551956177, - "log_odds_ratio": -0.6747015714645386, - "logits/chosen": -3.0936527252197266, - "logits/rejected": -3.1073575019836426, - "logps/chosen": -0.8353049159049988, - "logps/rejected": -1.0224361419677734, - "loss": 0.5278, - "nll_loss": 0.4731883108615875, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.0417652502655983, - "rewards/margins": 0.009356559254229069, - "rewards/rejected": -0.05112180858850479, + "grad_norm": 2.082847476776939, + "learning_rate": 2.3869801881466573e-06, + "log_odds_chosen": 0.2860751152038574, + "log_odds_ratio": -0.6700129508972168, + "logits/chosen": -2.825407028198242, + "logits/rejected": -2.8392233848571777, + "logps/chosen": -0.7431017756462097, + "logps/rejected": -0.9103603363037109, + "loss": 0.4884, + "nll_loss": 0.4357692301273346, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.037155088037252426, + "rewards/margins": 0.008362922817468643, + "rewards/rejected": -0.045518018305301666, "step": 860 }, { "epoch": 0.9124278972207656, - "grad_norm": 5.30319924106792, - "learning_rate": 6.780635036208105e-06, - "log_odds_chosen": 0.30106544494628906, - "log_odds_ratio": -0.6683878898620605, - "logits/chosen": -3.097151279449463, - "logits/rejected": -3.1499500274658203, - "logps/chosen": -0.867012619972229, - "logps/rejected": -1.0790386199951172, - "loss": 0.4933, - "nll_loss": 0.48347124457359314, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.04335063695907593, - "rewards/margins": 0.010601297952234745, - "rewards/rejected": -0.0539519302546978, + "grad_norm": 2.188429034443825, + "learning_rate": 2.3732222626728365e-06, + "log_odds_chosen": 0.3270949423313141, + "log_odds_ratio": -0.6543049812316895, + "logits/chosen": -2.8709769248962402, + "logits/rejected": -2.888324022293091, + "logps/chosen": -0.7763268947601318, + "logps/rejected": -0.9964207410812378, + "loss": 0.454, + "nll_loss": 0.4407920837402344, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.03881634771823883, + "rewards/margins": 0.011004697531461716, + "rewards/rejected": -0.04982104152441025, "step": 870 }, { "epoch": 0.9229155742003147, - "grad_norm": 1.6208302885778367, - "learning_rate": 6.741998624632421e-06, - "log_odds_chosen": 0.29186171293258667, - "log_odds_ratio": -0.6591932773590088, - "logits/chosen": -3.15583872795105, - "logits/rejected": -3.168064594268799, - "logps/chosen": -0.8187226057052612, - "logps/rejected": -1.0049909353256226, - "loss": 0.4887, - "nll_loss": 0.4384452700614929, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04093613475561142, - "rewards/margins": 0.009313413873314857, - "rewards/rejected": -0.05024954676628113, + "grad_norm": 1.8451620085670009, + "learning_rate": 2.359699518621347e-06, + "log_odds_chosen": 0.3485734164714813, + "log_odds_ratio": -0.6351412534713745, + "logits/chosen": -2.9025185108184814, + "logits/rejected": -2.8809902667999268, + "logps/chosen": -0.7233132719993591, + "logps/rejected": -0.9310896992683411, + "loss": 0.4524, + "nll_loss": 0.4024543762207031, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.03616566210985184, + "rewards/margins": 0.010388821363449097, + "rewards/rejected": -0.046554479748010635, "step": 880 }, { "epoch": 0.9334032511798637, - "grad_norm": 1.7707391073712173, - "learning_rate": 6.70401523153991e-06, - "log_odds_chosen": 0.33703380823135376, - "log_odds_ratio": -0.6459982991218567, - "logits/chosen": -3.1340742111206055, - "logits/rejected": -3.157071590423584, - "logps/chosen": -0.8063561320304871, - "logps/rejected": -0.9982324838638306, - "loss": 0.4931, - "nll_loss": 0.4631246030330658, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.040317803621292114, - "rewards/margins": 0.009593818336725235, - "rewards/rejected": -0.04991162568330765, + "grad_norm": 1.9306573871485972, + "learning_rate": 2.3464053310389682e-06, + "log_odds_chosen": 0.3904303014278412, + "log_odds_ratio": -0.623832106590271, + "logits/chosen": -2.84079909324646, + "logits/rejected": -2.8426525592803955, + "logps/chosen": -0.7186557650566101, + "logps/rejected": -0.9262601137161255, + "loss": 0.4565, + "nll_loss": 0.42616167664527893, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.035932786762714386, + "rewards/margins": 0.010380217805504799, + "rewards/rejected": -0.046313002705574036, "step": 890 }, { "epoch": 0.9438909281594127, - "grad_norm": 2.341682439233393, - "learning_rate": 6.666666666666667e-06, - "log_odds_chosen": 0.26426905393600464, - "log_odds_ratio": -0.6637164354324341, - "logits/chosen": -3.1100411415100098, - "logits/rejected": -3.130826473236084, - "logps/chosen": -0.7806347012519836, - "logps/rejected": -0.9385608434677124, - "loss": 0.5239, - "nll_loss": 0.4659123420715332, + "grad_norm": 2.157911532280212, + "learning_rate": 2.333333333333333e-06, + "log_odds_chosen": 0.3039458692073822, + "log_odds_ratio": -0.6423442959785461, + "logits/chosen": -2.896359920501709, + "logits/rejected": -2.9049692153930664, + "logps/chosen": -0.6981052756309509, + "logps/rejected": -0.8672422170639038, + "loss": 0.4851, + "nll_loss": 0.428159236907959, "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.039031732827425, - "rewards/margins": 0.00789631437510252, - "rewards/rejected": -0.0469280444085598, + "rewards/chosen": -0.03490526229143143, + "rewards/margins": 0.008456850424408913, + "rewards/rejected": -0.04336211457848549, "step": 900 }, { "epoch": 0.9438909281594127, - "eval_log_odds_chosen": 0.32679569721221924, - "eval_log_odds_ratio": -0.6327584385871887, - "eval_logits/chosen": -3.117077112197876, - "eval_logits/rejected": -3.119086742401123, - "eval_logps/chosen": -0.8044511079788208, - "eval_logps/rejected": -1.0129274129867554, - "eval_loss": 0.5086367726325989, - "eval_nll_loss": 0.4747697710990906, - "eval_rewards/accuracies": 0.6309523582458496, - "eval_rewards/chosen": -0.04022255912423134, - "eval_rewards/margins": 0.010423817671835423, - "eval_rewards/rejected": -0.05064636468887329, - "eval_runtime": 137.5576, - "eval_samples_per_second": 14.496, - "eval_steps_per_second": 0.458, + "eval_log_odds_chosen": 0.36685651540756226, + "eval_log_odds_ratio": -0.6244728565216064, + "eval_logits/chosen": -2.969223976135254, + "eval_logits/rejected": -2.9542508125305176, + "eval_logps/chosen": -0.7142534852027893, + "eval_logps/rejected": -0.9323597550392151, + "eval_loss": 0.47141149640083313, + "eval_nll_loss": 0.4360823631286621, + "eval_rewards/accuracies": 0.6527777910232544, + "eval_rewards/chosen": -0.035712677985429764, + "eval_rewards/margins": 0.01090531051158905, + "eval_rewards/rejected": -0.046617984771728516, + "eval_runtime": 138.0948, + "eval_samples_per_second": 14.439, + "eval_steps_per_second": 0.456, "step": 900 }, { "epoch": 0.9543786051389617, - "grad_norm": 2.0533389896159213, - "learning_rate": 6.629935441317959e-06, - "log_odds_chosen": 0.4754648208618164, - "log_odds_ratio": -0.6232188940048218, - "logits/chosen": -3.073176622390747, - "logits/rejected": -3.084963321685791, - "logps/chosen": -0.828788161277771, - "logps/rejected": -1.1443804502487183, - "loss": 0.5142, - "nll_loss": 0.46652156114578247, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.04143941029906273, - "rewards/margins": 0.015779614448547363, - "rewards/rejected": -0.05721902847290039, + "grad_norm": 2.4004822961845957, + "learning_rate": 2.3204774044612855e-06, + "log_odds_chosen": 0.4948676526546478, + "log_odds_ratio": -0.626745343208313, + "logits/chosen": -2.963355302810669, + "logits/rejected": -2.9515814781188965, + "logps/chosen": -0.7483548521995544, + "logps/rejected": -1.0602718591690063, + "loss": 0.4776, + "nll_loss": 0.42798590660095215, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.03741774708032608, + "rewards/margins": 0.015595847740769386, + "rewards/rejected": -0.05301359295845032, "step": 910 }, { "epoch": 0.9648662821185108, - "grad_norm": 2.138448059862142, - "learning_rate": 6.593804733957872e-06, - "log_odds_chosen": 0.32768282294273376, - "log_odds_ratio": -0.6431117057800293, - "logits/chosen": -3.038576364517212, - "logits/rejected": -3.061370372772217, - "logps/chosen": -0.7864677906036377, - "logps/rejected": -0.9946994781494141, - "loss": 0.4836, - "nll_loss": 0.43025264143943787, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03932339325547218, - "rewards/margins": 0.010411588475108147, - "rewards/rejected": -0.04973498359322548, + "grad_norm": 2.154391749062073, + "learning_rate": 2.3078316568852547e-06, + "log_odds_chosen": 0.3418871760368347, + "log_odds_ratio": -0.6459903717041016, + "logits/chosen": -2.8877079486846924, + "logits/rejected": -2.9023048877716064, + "logps/chosen": -0.7208271622657776, + "logps/rejected": -0.9329261779785156, + "loss": 0.4496, + "nll_loss": 0.39838844537734985, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.03604135662317276, + "rewards/margins": 0.010604949668049812, + "rewards/rejected": -0.04664631187915802, "step": 920 }, { "epoch": 0.9753539590980598, - "grad_norm": 2.1602863053901413, - "learning_rate": 6.55825835783953e-06, - "log_odds_chosen": 0.2050061970949173, - "log_odds_ratio": -0.6868597269058228, - "logits/chosen": -3.0544333457946777, - "logits/rejected": -3.066739797592163, - "logps/chosen": -0.8742432594299316, - "logps/rejected": -1.0194706916809082, - "loss": 0.5136, - "nll_loss": 0.5241981744766235, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04371216148138046, - "rewards/margins": 0.007261371705681086, - "rewards/rejected": -0.05097353458404541, + "grad_norm": 2.4150467379552776, + "learning_rate": 2.2953904252438353e-06, + "log_odds_chosen": 0.31212860345840454, + "log_odds_ratio": -0.6628017425537109, + "logits/chosen": -2.9404473304748535, + "logits/rejected": -2.935260772705078, + "logps/chosen": -0.7885305285453796, + "logps/rejected": -1.0043061971664429, + "loss": 0.4752, + "nll_loss": 0.48344022035598755, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0394265279173851, + "rewards/margins": 0.010788780637085438, + "rewards/rejected": -0.05021531134843826, "step": 930 }, { "epoch": 0.9858416360776088, - "grad_norm": 1.9215491222233851, - "learning_rate": 6.523280730534423e-06, - "log_odds_chosen": 0.23041269183158875, - "log_odds_ratio": -0.6992384195327759, - "logits/chosen": -3.0867247581481934, - "logits/rejected": -3.0779662132263184, - "logps/chosen": -0.7768861651420593, - "logps/rejected": -0.9184977412223816, - "loss": 0.5102, - "nll_loss": 0.4776674211025238, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.038844309747219086, - "rewards/margins": 0.0070805782452225685, - "rewards/rejected": -0.045924894511699677, + "grad_norm": 2.2491855597526786, + "learning_rate": 2.2831482556870475e-06, + "log_odds_chosen": 0.2697109580039978, + "log_odds_ratio": -0.6924097537994385, + "logits/chosen": -2.9477505683898926, + "logits/rejected": -2.9367494583129883, + "logps/chosen": -0.7188832759857178, + "logps/rejected": -0.8695234060287476, + "loss": 0.4739, + "nll_loss": 0.44516521692276, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.03594416007399559, + "rewards/margins": 0.007532012648880482, + "rewards/rejected": -0.0434761717915535, "step": 940 }, { "epoch": 0.9963293130571579, - "grad_norm": 2.1983436102574547, - "learning_rate": 6.488856845230502e-06, - "log_odds_chosen": 0.25244003534317017, - "log_odds_ratio": -0.6911928653717041, - "logits/chosen": -3.0215468406677246, - "logits/rejected": -3.0374438762664795, - "logps/chosen": -0.8648554682731628, - "logps/rejected": -1.0236364603042603, - "loss": 0.5385, - "nll_loss": 0.5036488175392151, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04324277862906456, - "rewards/margins": 0.00793905183672905, - "rewards/rejected": -0.05118182301521301, + "grad_norm": 2.438616188075854, + "learning_rate": 2.2710998958306758e-06, + "log_odds_chosen": 0.26511335372924805, + "log_odds_ratio": -0.6899660229682922, + "logits/chosen": -2.9427490234375, + "logits/rejected": -2.945517063140869, + "logps/chosen": -0.7803043127059937, + "logps/rejected": -0.9409860372543335, + "loss": 0.4993, + "nll_loss": 0.4652082026004791, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.03901521861553192, + "rewards/margins": 0.008034082129597664, + "rewards/rejected": -0.047049302607774734, "step": 950 }, { - "epoch": 1.0068169900367068, - "grad_norm": 2.2724469008271773, - "learning_rate": 6.4549722436790284e-06, - "log_odds_chosen": 1.0400245189666748, - "log_odds_ratio": -0.42517581582069397, - "logits/chosen": -3.0371384620666504, - "logits/rejected": -3.0435400009155273, - "logps/chosen": -0.5974615812301636, - "logps/rejected": -1.1842448711395264, - "loss": 0.3929, - "nll_loss": 0.40045398473739624, - "rewards/accuracies": 0.793749988079071, - "rewards/chosen": -0.02987307868897915, - "rewards/margins": 0.02933916449546814, - "rewards/rejected": -0.05921224504709244, - "step": 960 - }, - { - "epoch": 1.017304667016256, - "grad_norm": 2.0168885022396372, - "learning_rate": 6.421612990679356e-06, - "log_odds_chosen": 1.6284434795379639, - "log_odds_ratio": -0.2502659857273102, - "logits/chosen": -3.080873727798462, - "logits/rejected": -3.070159912109375, - "logps/chosen": -0.4285094141960144, - "logps/rejected": -1.2745321989059448, - "loss": 0.2923, - "nll_loss": 0.28497669100761414, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.02142546884715557, - "rewards/margins": 0.04230114072561264, - "rewards/rejected": -0.06372661143541336, - "step": 970 - }, - { - "epoch": 1.027792343995805, - "grad_norm": 1.9662869053425782, - "learning_rate": 6.3887656499994e-06, - "log_odds_chosen": 1.8482691049575806, - "log_odds_ratio": -0.21383436024188995, - "logits/chosen": -3.071471929550171, - "logits/rejected": -3.079923391342163, - "logps/chosen": -0.43078216910362244, - "logps/rejected": -1.4107215404510498, - "loss": 0.3019, - "nll_loss": 0.3140898644924164, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.021539105102419853, - "rewards/margins": 0.04899696633219719, - "rewards/rejected": -0.0705360695719719, - "step": 980 - }, - { - "epoch": 1.038280020975354, - "grad_norm": 1.9845582869348006, - "learning_rate": 6.356417261637282e-06, - "log_odds_chosen": 1.6627075672149658, - "log_odds_ratio": -0.2610566318035126, - "logits/chosen": -2.9875268936157227, - "logits/rejected": -2.9876785278320312, - "logps/chosen": -0.4378105103969574, - "logps/rejected": -1.3178083896636963, - "loss": 0.296, - "nll_loss": 0.27773916721343994, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.02189052477478981, - "rewards/margins": 0.04399988800287247, - "rewards/rejected": -0.06589041650295258, - "step": 990 - }, - { - "epoch": 1.048767697954903, - "grad_norm": 2.0942478813902783, - "learning_rate": 6.324555320336759e-06, - "log_odds_chosen": 1.9041988849639893, - "log_odds_ratio": -0.20684988796710968, - "logits/chosen": -2.9869093894958496, - "logits/rejected": -3.029050588607788, - "logps/chosen": -0.4077525734901428, - "logps/rejected": -1.3952513933181763, - "loss": 0.2888, - "nll_loss": 0.2748258709907532, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.02038763090968132, - "rewards/margins": 0.04937494546175003, - "rewards/rejected": -0.06976256519556046, - "step": 1000 - }, - { - "epoch": 1.048767697954903, - "eval_log_odds_chosen": 0.37935417890548706, - "eval_log_odds_ratio": -0.6318228840827942, - "eval_logits/chosen": -3.0189764499664307, - "eval_logits/rejected": -3.0171284675598145, - "eval_logps/chosen": -0.8724088072776794, - "eval_logps/rejected": -1.112794280052185, - "eval_loss": 0.5400179028511047, - "eval_nll_loss": 0.5058131814002991, - "eval_rewards/accuracies": 0.6428571343421936, - "eval_rewards/chosen": -0.04362044483423233, - "eval_rewards/margins": 0.012019270099699497, - "eval_rewards/rejected": -0.05563971400260925, - "eval_runtime": 136.9938, - "eval_samples_per_second": 14.555, - "eval_steps_per_second": 0.46, - "step": 1000 - }, - { - "epoch": 1.059255374934452, - "grad_norm": 1.8526210480251912, - "learning_rate": 6.2931677552755265e-06, - "log_odds_chosen": 1.7620799541473389, - "log_odds_ratio": -0.23190836608409882, - "logits/chosen": -3.0539023876190186, - "logits/rejected": -3.0629706382751465, - "logps/chosen": -0.43785715103149414, - "logps/rejected": -1.3722269535064697, - "loss": 0.2859, - "nll_loss": 0.2769049108028412, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021892856806516647, - "rewards/margins": 0.04671848937869072, - "rewards/rejected": -0.06861135363578796, - "step": 1010 - }, - { - "epoch": 1.069743051914001, - "grad_norm": 2.017775428059147, - "learning_rate": 6.262242910851496e-06, - "log_odds_chosen": 1.7232574224472046, - "log_odds_ratio": -0.22979629039764404, - "logits/chosen": -3.0019690990448, - "logits/rejected": -3.0224807262420654, - "logps/chosen": -0.4002920091152191, - "logps/rejected": -1.3048107624053955, - "loss": 0.2894, - "nll_loss": 0.2588661015033722, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.020014600828289986, - "rewards/margins": 0.04522594064474106, - "rewards/rejected": -0.0652405396103859, - "step": 1020 - }, - { - "epoch": 1.08023072889355, - "grad_norm": 2.1656896077764, - "learning_rate": 6.231769528497559e-06, - "log_odds_chosen": 1.7999454736709595, - "log_odds_ratio": -0.23009638488292694, - "logits/chosen": -3.0344815254211426, - "logits/rejected": -3.0285098552703857, - "logps/chosen": -0.42475366592407227, - "logps/rejected": -1.3811571598052979, - "loss": 0.2779, - "nll_loss": 0.26928776502609253, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021237684413790703, - "rewards/margins": 0.04782017320394516, - "rewards/rejected": -0.06905786693096161, - "step": 1030 - }, - { - "epoch": 1.0907184058730992, - "grad_norm": 1.8893124181143397, - "learning_rate": 6.2017367294604225e-06, - "log_odds_chosen": 1.7361199855804443, - "log_odds_ratio": -0.2356552630662918, - "logits/chosen": -2.9798855781555176, - "logits/rejected": -3.012021780014038, - "logps/chosen": -0.4087589383125305, - "logps/rejected": -1.318456768989563, - "loss": 0.2848, - "nll_loss": 0.2693423926830292, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.020437946543097496, - "rewards/margins": 0.045484889298677444, - "rewards/rejected": -0.06592283397912979, - "step": 1040 - }, - { - "epoch": 1.1012060828526482, - "grad_norm": 1.998285617344112, - "learning_rate": 6.172133998483677e-06, - "log_odds_chosen": 1.989933967590332, - "log_odds_ratio": -0.2104463130235672, - "logits/chosen": -2.9669861793518066, - "logits/rejected": -2.992997169494629, - "logps/chosen": -0.4091659486293793, - "logps/rejected": -1.4872965812683105, - "loss": 0.2793, - "nll_loss": 0.24384136497974396, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.020458297803997993, - "rewards/margins": 0.05390653759241104, - "rewards/rejected": -0.07436482608318329, - "step": 1050 - }, - { - "epoch": 1.1116937598321972, - "grad_norm": 1.99753785316238, - "learning_rate": 6.142951168339513e-06, - "log_odds_chosen": 1.7905690670013428, - "log_odds_ratio": -0.2465437948703766, - "logits/chosen": -2.9944257736206055, - "logits/rejected": -2.988699436187744, - "logps/chosen": -0.41175705194473267, - "logps/rejected": -1.3037220239639282, - "loss": 0.2828, - "nll_loss": 0.2829252779483795, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.020587850362062454, - "rewards/margins": 0.04459824413061142, - "rewards/rejected": -0.06518609821796417, - "step": 1060 - }, - { - "epoch": 1.1221814368117462, - "grad_norm": 2.0944607329795666, - "learning_rate": 6.114178405157431e-06, - "log_odds_chosen": 1.972241759300232, - "log_odds_ratio": -0.202741339802742, - "logits/chosen": -2.9314074516296387, - "logits/rejected": -2.943037271499634, - "logps/chosen": -0.39666005969047546, - "logps/rejected": -1.4398232698440552, - "loss": 0.2869, - "nll_loss": 0.26206424832344055, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.019833002239465714, - "rewards/margins": 0.0521581657230854, - "rewards/rejected": -0.07199116796255112, - "step": 1070 - }, - { - "epoch": 1.1326691137912952, - "grad_norm": 2.082309850512046, - "learning_rate": 6.0858061945018455e-06, - "log_odds_chosen": 1.9569040536880493, - "log_odds_ratio": -0.20189175009727478, - "logits/chosen": -2.9233288764953613, - "logits/rejected": -2.953047275543213, - "logps/chosen": -0.4349672198295593, - "logps/rejected": -1.479813814163208, - "loss": 0.286, - "nll_loss": 0.25732284784317017, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.021748360246419907, - "rewards/margins": 0.052242327481508255, - "rewards/rejected": -0.07399068772792816, - "step": 1080 - }, - { - "epoch": 1.1431567907708442, - "grad_norm": 1.977872551014816, - "learning_rate": 6.0578253281538265e-06, - "log_odds_chosen": 1.8792686462402344, - "log_odds_ratio": -0.23301272094249725, - "logits/chosen": -2.9573769569396973, - "logits/rejected": -2.968686103820801, - "logps/chosen": -0.3683982789516449, - "logps/rejected": -1.286027431488037, - "loss": 0.2841, - "nll_loss": 0.26943594217300415, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.018419915810227394, - "rewards/margins": 0.04588145762681961, - "rewards/rejected": -0.06430138647556305, - "step": 1090 - }, - { - "epoch": 1.1536444677503932, - "grad_norm": 2.2874664942911984, - "learning_rate": 6.030226891555273e-06, - "log_odds_chosen": 1.744699239730835, - "log_odds_ratio": -0.2575313448905945, - "logits/chosen": -3.0328478813171387, - "logits/rejected": -3.0531229972839355, - "logps/chosen": -0.4480053782463074, - "logps/rejected": -1.409203290939331, - "loss": 0.29, - "nll_loss": 0.2910405397415161, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.022400271147489548, - "rewards/margins": 0.04805989935994148, - "rewards/rejected": -0.07046017050743103, - "step": 1100 - }, - { - "epoch": 1.1536444677503932, - "eval_log_odds_chosen": 0.4246710240840912, - "eval_log_odds_ratio": -0.6255837082862854, - "eval_logits/chosen": -3.002875804901123, - "eval_logits/rejected": -3.0027201175689697, - "eval_logps/chosen": -0.8736297488212585, - "eval_logps/rejected": -1.1487443447113037, - "eval_loss": 0.5385290384292603, - "eval_nll_loss": 0.5041735172271729, - "eval_rewards/accuracies": 0.64682537317276, - "eval_rewards/chosen": -0.04368148371577263, - "eval_rewards/margins": 0.013755732215940952, - "eval_rewards/rejected": -0.057437218725681305, - "eval_runtime": 136.8823, - "eval_samples_per_second": 14.567, - "eval_steps_per_second": 0.46, - "step": 1100 - }, - { - "epoch": 1.1641321447299422, - "grad_norm": 1.8147231314332177, - "learning_rate": 6.003002251876643e-06, - "log_odds_chosen": 1.8075166940689087, - "log_odds_ratio": -0.2281859815120697, - "logits/chosen": -2.965421199798584, - "logits/rejected": -3.0172793865203857, - "logps/chosen": -0.44597238302230835, - "logps/rejected": -1.4203885793685913, - "loss": 0.2891, - "nll_loss": 0.2668479084968567, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.022298619151115417, - "rewards/margins": 0.04872080683708191, - "rewards/rejected": -0.07101943343877792, - "step": 1110 - }, - { - "epoch": 1.1746198217094914, - "grad_norm": 1.9969430269469466, - "learning_rate": 5.976143046671968e-06, - "log_odds_chosen": 1.7478984594345093, - "log_odds_ratio": -0.22862455248832703, - "logits/chosen": -3.0243489742279053, - "logits/rejected": -3.0321333408355713, - "logps/chosen": -0.40696269273757935, - "logps/rejected": -1.2988313436508179, - "loss": 0.2927, - "nll_loss": 0.27604612708091736, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02034812793135643, - "rewards/margins": 0.04459343105554581, - "rewards/rejected": -0.06494157016277313, - "step": 1120 - }, - { - "epoch": 1.1851074986890404, - "grad_norm": 2.1896703421371275, - "learning_rate": 5.949641173087296e-06, - "log_odds_chosen": 2.048767566680908, - "log_odds_ratio": -0.20188426971435547, - "logits/chosen": -2.9657158851623535, - "logits/rejected": -2.977405309677124, - "logps/chosen": -0.38311532139778137, - "logps/rejected": -1.454978108406067, - "loss": 0.2825, - "nll_loss": 0.2597211003303528, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.01915576681494713, - "rewards/margins": 0.05359314754605293, - "rewards/rejected": -0.07274890691041946, - "step": 1130 - }, - { - "epoch": 1.1955951756685894, - "grad_norm": 1.8856822247943528, - "learning_rate": 5.923488777590924e-06, - "log_odds_chosen": 1.9368520975112915, - "log_odds_ratio": -0.21634550392627716, - "logits/chosen": -3.009665012359619, - "logits/rejected": -3.0066471099853516, - "logps/chosen": -0.412930428981781, - "logps/rejected": -1.4850547313690186, - "loss": 0.2786, - "nll_loss": 0.28015536069869995, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.02064652182161808, - "rewards/margins": 0.05360621213912964, - "rewards/rejected": -0.07425273954868317, - "step": 1140 - }, - { - "epoch": 1.2060828526481384, - "grad_norm": 2.2165729739830233, - "learning_rate": 5.897678246195886e-06, - "log_odds_chosen": 1.9798767566680908, - "log_odds_ratio": -0.19855430722236633, - "logits/chosen": -2.9805493354797363, - "logits/rejected": -2.9919371604919434, - "logps/chosen": -0.38313865661621094, - "logps/rejected": -1.3864378929138184, - "loss": 0.2909, - "nll_loss": 0.27790573239326477, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.019156932830810547, - "rewards/margins": 0.05016495659947395, - "rewards/rejected": -0.0693218931555748, - "step": 1150 - }, - { - "epoch": 1.2165705296276874, - "grad_norm": 2.8337045840850497, - "learning_rate": 5.8722021951470355e-06, - "log_odds_chosen": 1.7361915111541748, - "log_odds_ratio": -0.24711327254772186, - "logits/chosen": -2.966083288192749, - "logits/rejected": -2.9842519760131836, - "logps/chosen": -0.4412474036216736, - "logps/rejected": -1.3824529647827148, - "loss": 0.2781, - "nll_loss": 0.2754039466381073, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.02206237055361271, - "rewards/margins": 0.0470602810382843, - "rewards/rejected": -0.06912264972925186, - "step": 1160 - }, - { - "epoch": 1.2270582066072364, - "grad_norm": 1.7729938432799273, - "learning_rate": 5.847053462046862e-06, - "log_odds_chosen": 1.7805134057998657, - "log_odds_ratio": -0.23545412719249725, - "logits/chosen": -3.0085816383361816, - "logits/rejected": -3.003875494003296, - "logps/chosen": -0.4123718738555908, - "logps/rejected": -1.3221479654312134, - "loss": 0.2829, - "nll_loss": 0.2879020869731903, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.02061859332025051, - "rewards/margins": 0.045488808304071426, - "rewards/rejected": -0.06610739976167679, - "step": 1170 - }, - { - "epoch": 1.2375458835867854, - "grad_norm": 2.2169036925519454, - "learning_rate": 5.822225097395821e-06, - "log_odds_chosen": 1.9844211339950562, - "log_odds_ratio": -0.1866404265165329, - "logits/chosen": -2.9880988597869873, - "logits/rejected": -3.0081310272216797, - "logps/chosen": -0.3858886957168579, - "logps/rejected": -1.3924882411956787, - "loss": 0.2873, - "nll_loss": 0.25162869691848755, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.019294437021017075, - "rewards/margins": 0.05032998323440552, - "rewards/rejected": -0.06962442398071289, - "step": 1180 - }, - { - "epoch": 1.2480335605663346, - "grad_norm": 2.1614361138819045, - "learning_rate": 5.797710356524486e-06, - "log_odds_chosen": 1.8616158962249756, - "log_odds_ratio": -0.22632256150245667, - "logits/chosen": -3.0017178058624268, - "logits/rejected": -3.0013363361358643, - "logps/chosen": -0.4442955553531647, - "logps/rejected": -1.4363129138946533, - "loss": 0.2867, - "nll_loss": 0.289310485124588, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.022214777767658234, - "rewards/margins": 0.04960086941719055, - "rewards/rejected": -0.07181564718484879, - "step": 1190 - }, - { - "epoch": 1.2585212375458836, - "grad_norm": 2.0470229728313494, - "learning_rate": 5.773502691896259e-06, - "log_odds_chosen": 1.8614075183868408, - "log_odds_ratio": -0.2429337054491043, - "logits/chosen": -2.9596099853515625, - "logits/rejected": -2.9728147983551025, - "logps/chosen": -0.44122061133384705, - "logps/rejected": -1.4644559621810913, - "loss": 0.2826, - "nll_loss": 0.2614334225654602, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.022061031311750412, - "rewards/margins": 0.051161766052246094, - "rewards/rejected": -0.0732228010892868, - "step": 1200 - }, - { - "epoch": 1.2585212375458836, - "eval_log_odds_chosen": 0.4214767515659332, - "eval_log_odds_ratio": -0.6254101991653442, - "eval_logits/chosen": -2.9582858085632324, - "eval_logits/rejected": -2.96195912361145, - "eval_logps/chosen": -0.8853804469108582, - "eval_logps/rejected": -1.162561058998108, - "eval_loss": 0.5427829027175903, - "eval_nll_loss": 0.5084435939788818, - "eval_rewards/accuracies": 0.6428571343421936, - "eval_rewards/chosen": -0.04426902160048485, - "eval_rewards/margins": 0.013859033584594727, - "eval_rewards/rejected": -0.05812805891036987, - "eval_runtime": 137.2006, - "eval_samples_per_second": 14.533, - "eval_steps_per_second": 0.459, - "step": 1200 - }, - { - "epoch": 1.2690089145254326, - "grad_norm": 2.3388472125063946, - "learning_rate": 5.749595745760691e-06, - "log_odds_chosen": 1.858030080795288, - "log_odds_ratio": -0.21272964775562286, - "logits/chosen": -2.996577739715576, - "logits/rejected": -3.0146660804748535, - "logps/chosen": -0.4070938229560852, - "logps/rejected": -1.3386101722717285, - "loss": 0.2988, - "nll_loss": 0.292961448431015, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02035469003021717, - "rewards/margins": 0.04657582566142082, - "rewards/rejected": -0.06693051755428314, - "step": 1210 - }, - { - "epoch": 1.2794965915049816, - "grad_norm": 1.9762440493042526, - "learning_rate": 5.725983343138682e-06, - "log_odds_chosen": 1.7544046640396118, - "log_odds_ratio": -0.22841353714466095, - "logits/chosen": -2.9734439849853516, - "logits/rejected": -2.9992988109588623, - "logps/chosen": -0.42544227838516235, - "logps/rejected": -1.3273015022277832, - "loss": 0.295, - "nll_loss": 0.28989139199256897, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021272115409374237, - "rewards/margins": 0.04509295895695686, - "rewards/rejected": -0.0663650780916214, - "step": 1220 - }, - { - "epoch": 1.2899842684845306, - "grad_norm": 2.230074491318477, - "learning_rate": 5.702659485122011e-06, - "log_odds_chosen": 1.929265022277832, - "log_odds_ratio": -0.20951807498931885, - "logits/chosen": -2.9871158599853516, - "logits/rejected": -2.993727207183838, - "logps/chosen": -0.40125927329063416, - "logps/rejected": -1.4160717725753784, - "loss": 0.2653, - "nll_loss": 0.23026029765605927, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.020062964409589767, - "rewards/margins": 0.050740621984004974, - "rewards/rejected": -0.07080359011888504, - "step": 1230 - }, - { - "epoch": 1.3004719454640796, - "grad_norm": 1.9679461376203173, - "learning_rate": 5.679618342470648e-06, - "log_odds_chosen": 1.7371532917022705, - "log_odds_ratio": -0.2242734134197235, - "logits/chosen": -3.0132291316986084, - "logits/rejected": -3.0433402061462402, - "logps/chosen": -0.413210391998291, - "logps/rejected": -1.3000330924987793, - "loss": 0.2804, - "nll_loss": 0.29589781165122986, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.0206605214625597, - "rewards/margins": 0.04434113949537277, - "rewards/rejected": -0.06500165909528732, - "step": 1240 - }, - { - "epoch": 1.3109596224436286, - "grad_norm": 2.617277483095543, - "learning_rate": 5.656854249492381e-06, - "log_odds_chosen": 1.814679741859436, - "log_odds_ratio": -0.22298629581928253, - "logits/chosen": -2.996896266937256, - "logits/rejected": -3.0056145191192627, - "logps/chosen": -0.42395251989364624, - "logps/rejected": -1.3927456140518188, - "loss": 0.2687, - "nll_loss": 0.25607752799987793, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021197626367211342, - "rewards/margins": 0.04843965172767639, - "rewards/rejected": -0.06963728368282318, - "step": 1250 - }, - { - "epoch": 1.3214472994231778, - "grad_norm": 1.9773184888291742, - "learning_rate": 5.63436169819011e-06, - "log_odds_chosen": 1.8136640787124634, - "log_odds_ratio": -0.24320077896118164, - "logits/chosen": -2.966784954071045, - "logits/rejected": -3.001746892929077, - "logps/chosen": -0.45541706681251526, - "logps/rejected": -1.3951488733291626, - "loss": 0.2988, - "nll_loss": 0.31274476647377014, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.022770855575799942, - "rewards/margins": 0.046986598521471024, - "rewards/rejected": -0.06975744664669037, - "step": 1260 - }, - { - "epoch": 1.3319349764027268, - "grad_norm": 1.9140818928985086, - "learning_rate": 5.612135332663138e-06, - "log_odds_chosen": 1.953155755996704, - "log_odds_ratio": -0.21717992424964905, - "logits/chosen": -3.006328821182251, - "logits/rejected": -3.037388324737549, - "logps/chosen": -0.42650872468948364, - "logps/rejected": -1.495060682296753, - "loss": 0.272, - "nll_loss": 0.2669217586517334, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.021325435489416122, - "rewards/margins": 0.053427595645189285, - "rewards/rejected": -0.074753038585186, - "step": 1270 - }, - { - "epoch": 1.3424226533822758, - "grad_norm": 1.9500186785754579, - "learning_rate": 5.590169943749475e-06, - "log_odds_chosen": 1.8904393911361694, - "log_odds_ratio": -0.2255454808473587, - "logits/chosen": -2.989861011505127, - "logits/rejected": -3.0198075771331787, - "logps/chosen": -0.424043744802475, - "logps/rejected": -1.4651858806610107, - "loss": 0.2783, - "nll_loss": 0.267769455909729, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.02120218798518181, - "rewards/margins": 0.05205710977315903, - "rewards/rejected": -0.07325930893421173, - "step": 1280 - }, - { - "epoch": 1.3529103303618248, - "grad_norm": 1.9502765281924526, - "learning_rate": 5.568460463897046e-06, - "log_odds_chosen": 1.8929240703582764, - "log_odds_ratio": -0.21857920289039612, - "logits/chosen": -2.9535863399505615, - "logits/rejected": -2.9874510765075684, - "logps/chosen": -0.45026451349258423, - "logps/rejected": -1.4960235357284546, - "loss": 0.295, - "nll_loss": 0.27629774808883667, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.022513221949338913, - "rewards/margins": 0.052287958562374115, - "rewards/rejected": -0.07480116933584213, - "step": 1290 - }, - { - "epoch": 1.3633980073413738, - "grad_norm": 2.2093191033587223, - "learning_rate": 5.547001962252292e-06, - "log_odds_chosen": 1.7265195846557617, - "log_odds_ratio": -0.23279574513435364, - "logits/chosen": -2.9012649059295654, - "logits/rejected": -2.9128100872039795, - "logps/chosen": -0.4365314841270447, - "logps/rejected": -1.3402652740478516, - "loss": 0.2796, - "nll_loss": 0.28851714730262756, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021826574578881264, - "rewards/margins": 0.04518669471144676, - "rewards/rejected": -0.06701326370239258, - "step": 1300 - }, - { - "epoch": 1.3633980073413738, - "eval_log_odds_chosen": 0.45076510310173035, - "eval_log_odds_ratio": -0.6208177208900452, - "eval_logits/chosen": -2.928496837615967, - "eval_logits/rejected": -2.9256343841552734, - "eval_logps/chosen": -0.8825219869613647, - "eval_logps/rejected": -1.1770830154418945, - "eval_loss": 0.5392885208129883, - "eval_nll_loss": 0.5060464143753052, - "eval_rewards/accuracies": 0.64682537317276, - "eval_rewards/chosen": -0.044126104563474655, - "eval_rewards/margins": 0.014728044159710407, - "eval_rewards/rejected": -0.05885414779186249, - "eval_runtime": 136.6608, - "eval_samples_per_second": 14.591, - "eval_steps_per_second": 0.461, - "step": 1300 - }, - { - "epoch": 1.3738856843209228, - "grad_norm": 1.715926192038861, - "learning_rate": 5.525789639955377e-06, - "log_odds_chosen": 2.0803933143615723, - "log_odds_ratio": -0.21633043885231018, - "logits/chosen": -2.926987409591675, - "logits/rejected": -2.9622962474823, - "logps/chosen": -0.43519288301467896, - "logps/rejected": -1.6533997058868408, - "loss": 0.2713, - "nll_loss": 0.26452213525772095, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.02175964042544365, - "rewards/margins": 0.06091034412384033, - "rewards/rejected": -0.08266998082399368, - "step": 1310 - }, - { - "epoch": 1.3843733613004718, - "grad_norm": 2.0174814570503012, - "learning_rate": 5.504818825631804e-06, - "log_odds_chosen": 2.108902931213379, - "log_odds_ratio": -0.1835678517818451, - "logits/chosen": -2.96756911277771, - "logits/rejected": -2.9531686305999756, - "logps/chosen": -0.3781605362892151, - "logps/rejected": -1.4976880550384521, - "loss": 0.267, - "nll_loss": 0.25148090720176697, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.018908025696873665, - "rewards/margins": 0.055976372212171555, - "rewards/rejected": -0.07488439977169037, - "step": 1320 - }, - { - "epoch": 1.394861038280021, - "grad_norm": 2.317364085817375, - "learning_rate": 5.484084971070817e-06, - "log_odds_chosen": 1.9238555431365967, - "log_odds_ratio": -0.2074807584285736, - "logits/chosen": -2.923131227493286, - "logits/rejected": -2.9520606994628906, - "logps/chosen": -0.42446833848953247, - "logps/rejected": -1.4086004495620728, - "loss": 0.2852, - "nll_loss": 0.28959181904792786, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.021223418414592743, - "rewards/margins": 0.049206603318452835, - "rewards/rejected": -0.07043002545833588, - "step": 1330 - }, - { - "epoch": 1.40534871525957, - "grad_norm": 2.165975215343917, - "learning_rate": 5.4635836470815305e-06, - "log_odds_chosen": 1.8837333917617798, - "log_odds_ratio": -0.21855314075946808, - "logits/chosen": -2.9127135276794434, - "logits/rejected": -2.9249043464660645, - "logps/chosen": -0.41960373520851135, - "logps/rejected": -1.4260175228118896, - "loss": 0.2787, - "nll_loss": 0.25244617462158203, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.020980186760425568, - "rewards/margins": 0.050320692360401154, - "rewards/rejected": -0.07130087912082672, - "step": 1340 - }, - { - "epoch": 1.415836392239119, - "grad_norm": 1.9224928940953034, - "learning_rate": 5.443310539518174e-06, - "log_odds_chosen": 2.056159734725952, - "log_odds_ratio": -0.19483168423175812, - "logits/chosen": -2.956674814224243, - "logits/rejected": -2.9572062492370605, - "logps/chosen": -0.4208443760871887, - "logps/rejected": -1.5285457372665405, - "loss": 0.2822, - "nll_loss": 0.26951080560684204, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.021042218431830406, - "rewards/margins": 0.05538507178425789, - "rewards/rejected": -0.07642728835344315, - "step": 1350 - }, - { - "epoch": 1.426324069218668, - "grad_norm": 2.0115204434239025, - "learning_rate": 5.423261445466404e-06, - "log_odds_chosen": 1.707457184791565, - "log_odds_ratio": -0.2479782998561859, - "logits/chosen": -2.915250301361084, - "logits/rejected": -2.9445343017578125, - "logps/chosen": -0.4267791211605072, - "logps/rejected": -1.3377535343170166, - "loss": 0.2925, - "nll_loss": 0.29825955629348755, - "rewards/accuracies": 0.9312499761581421, - "rewards/chosen": -0.02133895456790924, - "rewards/margins": 0.04554871469736099, - "rewards/rejected": -0.06688766926527023, - "step": 1360 - }, - { - "epoch": 1.436811746198217, - "grad_norm": 2.0083912520624234, - "learning_rate": 5.403432269582992e-06, - "log_odds_chosen": 1.7433815002441406, - "log_odds_ratio": -0.23284384608268738, - "logits/chosen": -2.9682974815368652, - "logits/rejected": -2.9809725284576416, - "logps/chosen": -0.4545938968658447, - "logps/rejected": -1.3821640014648438, - "loss": 0.2995, - "nll_loss": 0.2861328721046448, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.022729698568582535, - "rewards/margins": 0.04637850075960159, - "rewards/rejected": -0.06910820305347443, - "step": 1370 - }, - { - "epoch": 1.447299423177766, - "grad_norm": 1.918494069287167, - "learning_rate": 5.383819020581656e-06, - "log_odds_chosen": 1.839255690574646, - "log_odds_ratio": -0.22518055140972137, - "logits/chosen": -2.9555628299713135, - "logits/rejected": -2.968390703201294, - "logps/chosen": -0.4370731711387634, - "logps/rejected": -1.4699593782424927, - "loss": 0.2859, - "nll_loss": 0.28876128792762756, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.021853657439351082, - "rewards/margins": 0.051644302904605865, - "rewards/rejected": -0.0734979659318924, - "step": 1380 - }, - { - "epoch": 1.457787100157315, - "grad_norm": 1.8701436058229068, - "learning_rate": 5.364417807858201e-06, - "log_odds_chosen": 2.0006766319274902, - "log_odds_ratio": -0.19503512978553772, - "logits/chosen": -2.9456233978271484, - "logits/rejected": -2.9416487216949463, - "logps/chosen": -0.397217720746994, - "logps/rejected": -1.458070993423462, - "loss": 0.2898, - "nll_loss": 0.2990682125091553, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.01986088417470455, - "rewards/margins": 0.05304265767335892, - "rewards/rejected": -0.07290354371070862, - "step": 1390 - }, - { - "epoch": 1.4682747771368643, - "grad_norm": 1.8947645182805886, - "learning_rate": 5.345224838248489e-06, - "log_odds_chosen": 1.9478137493133545, - "log_odds_ratio": -0.22849062085151672, - "logits/chosen": -2.9488446712493896, - "logits/rejected": -2.980994462966919, - "logps/chosen": -0.38306254148483276, - "logps/rejected": -1.40244460105896, - "loss": 0.2784, - "nll_loss": 0.27079683542251587, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.019153129309415817, - "rewards/margins": 0.05096910148859024, - "rewards/rejected": -0.07012222707271576, - "step": 1400 - }, - { - "epoch": 1.4682747771368643, - "eval_log_odds_chosen": 0.4410339295864105, - "eval_log_odds_ratio": -0.6236060261726379, - "eval_logits/chosen": -2.9594457149505615, - "eval_logits/rejected": -2.9583115577697754, - "eval_logps/chosen": -0.8884981274604797, - "eval_logps/rejected": -1.1784039735794067, - "eval_loss": 0.5364598631858826, - "eval_nll_loss": 0.5036527514457703, - "eval_rewards/accuracies": 0.6527777910232544, - "eval_rewards/chosen": -0.044424910098314285, - "eval_rewards/margins": 0.014495291747152805, - "eval_rewards/rejected": -0.058920200914144516, - "eval_runtime": 139.2595, - "eval_samples_per_second": 14.319, - "eval_steps_per_second": 0.452, - "step": 1400 - }, - { - "epoch": 1.4787624541164133, - "grad_norm": 2.1665159464201142, - "learning_rate": 5.326236412913075e-06, - "log_odds_chosen": 1.7970411777496338, - "log_odds_ratio": -0.2380552738904953, - "logits/chosen": -2.9149088859558105, - "logits/rejected": -2.9543135166168213, - "logps/chosen": -0.4362480640411377, - "logps/rejected": -1.3472230434417725, - "loss": 0.29, - "nll_loss": 0.2710421681404114, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021812403574585915, - "rewards/margins": 0.04554874822497368, - "rewards/rejected": -0.06736114621162415, - "step": 1410 - }, - { - "epoch": 1.4892501310959623, - "grad_norm": 2.196966160421767, - "learning_rate": 5.307448924342753e-06, - "log_odds_chosen": 1.8308820724487305, - "log_odds_ratio": -0.21477296948432922, - "logits/chosen": -2.877204179763794, - "logits/rejected": -2.932901620864868, - "logps/chosen": -0.4031652510166168, - "logps/rejected": -1.3179484605789185, - "loss": 0.2855, - "nll_loss": 0.2783321738243103, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.02015826478600502, - "rewards/margins": 0.04573915898799896, - "rewards/rejected": -0.06589742004871368, - "step": 1420 - }, - { - "epoch": 1.4997378080755113, - "grad_norm": 2.1884907491879084, - "learning_rate": 5.28885885347945e-06, - "log_odds_chosen": 1.9711707830429077, - "log_odds_ratio": -0.20648148655891418, - "logits/chosen": -2.954136371612549, - "logits/rejected": -2.9814727306365967, - "logps/chosen": -0.41374531388282776, - "logps/rejected": -1.4304702281951904, - "loss": 0.2924, - "nll_loss": 0.27289509773254395, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.020687269046902657, - "rewards/margins": 0.0508362352848053, - "rewards/rejected": -0.071523517370224, - "step": 1430 - }, - { - "epoch": 1.5102254850550603, - "grad_norm": 2.124176001387226, - "learning_rate": 5.270462766947299e-06, - "log_odds_chosen": 1.7731349468231201, - "log_odds_ratio": -0.2392440289258957, - "logits/chosen": -2.9405388832092285, - "logits/rejected": -2.9464943408966064, - "logps/chosen": -0.4539235234260559, - "logps/rejected": -1.403793454170227, - "loss": 0.2961, - "nll_loss": 0.2940642237663269, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.022696174681186676, - "rewards/margins": 0.04749349504709244, - "rewards/rejected": -0.07018966972827911, - "step": 1440 - }, - { - "epoch": 1.5207131620346095, - "grad_norm": 1.8197825407446042, - "learning_rate": 5.252257314388902e-06, - "log_odds_chosen": 1.7956994771957397, - "log_odds_ratio": -0.22454524040222168, - "logits/chosen": -2.954716444015503, - "logits/rejected": -2.978447437286377, - "logps/chosen": -0.4430459439754486, - "logps/rejected": -1.4194531440734863, - "loss": 0.2777, - "nll_loss": 0.24652138352394104, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02215229719877243, - "rewards/margins": 0.048820365220308304, - "rewards/rejected": -0.07097266614437103, - "step": 1450 - }, - { - "epoch": 1.5312008390141583, - "grad_norm": 2.1915818543360355, - "learning_rate": 5.234239225902137e-06, - "log_odds_chosen": 1.9382715225219727, - "log_odds_ratio": -0.1963178515434265, - "logits/chosen": -2.8938894271850586, - "logits/rejected": -2.924325466156006, - "logps/chosen": -0.39880725741386414, - "logps/rejected": -1.4752063751220703, - "loss": 0.2971, - "nll_loss": 0.2676003575325012, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.019940361380577087, - "rewards/margins": 0.05381995439529419, - "rewards/rejected": -0.07376032322645187, - "step": 1460 - }, - { - "epoch": 1.5416885159937075, - "grad_norm": 2.1118618734250307, - "learning_rate": 5.216405309573011e-06, - "log_odds_chosen": 1.9139398336410522, - "log_odds_ratio": -0.19271975755691528, - "logits/chosen": -3.0117218494415283, - "logits/rejected": -3.0411810874938965, - "logps/chosen": -0.42149630188941956, - "logps/rejected": -1.471760869026184, - "loss": 0.2889, - "nll_loss": 0.27934783697128296, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.0210748128592968, - "rewards/margins": 0.052513234317302704, - "rewards/rejected": -0.0735880434513092, - "step": 1470 - }, - { - "epoch": 1.5521761929732563, - "grad_norm": 2.0510895547316745, - "learning_rate": 5.198752449100364e-06, - "log_odds_chosen": 2.0376482009887695, - "log_odds_ratio": -0.19703765213489532, - "logits/chosen": -3.009754180908203, - "logits/rejected": -3.016758441925049, - "logps/chosen": -0.40712347626686096, - "logps/rejected": -1.459837555885315, - "loss": 0.2888, - "nll_loss": 0.3001149892807007, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.020356174558401108, - "rewards/margins": 0.05263570696115494, - "rewards/rejected": -0.07299187034368515, - "step": 1480 - }, - { - "epoch": 1.5626638699528055, - "grad_norm": 2.1669568438399684, - "learning_rate": 5.181277601508398e-06, - "log_odds_chosen": 1.8304507732391357, - "log_odds_ratio": -0.2394884079694748, - "logits/chosen": -2.9779343605041504, - "logits/rejected": -3.008795738220215, - "logps/chosen": -0.4576667249202728, - "logps/rejected": -1.4601542949676514, - "loss": 0.2888, - "nll_loss": 0.29476073384284973, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.0228833369910717, - "rewards/margins": 0.05012437701225281, - "rewards/rejected": -0.07300771772861481, - "step": 1490 - }, - { - "epoch": 1.5731515469323545, - "grad_norm": 2.372050874462119, - "learning_rate": 5.163977794943223e-06, - "log_odds_chosen": 1.9750179052352905, - "log_odds_ratio": -0.19530083239078522, - "logits/chosen": -2.9395532608032227, - "logits/rejected": -2.991283893585205, - "logps/chosen": -0.42392611503601074, - "logps/rejected": -1.5091795921325684, - "loss": 0.2873, - "nll_loss": 0.2818702757358551, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021196305751800537, - "rewards/margins": 0.0542626678943634, - "rewards/rejected": -0.07545898109674454, - "step": 1500 - }, - { - "epoch": 1.5731515469323545, - "eval_log_odds_chosen": 0.4364486038684845, - "eval_log_odds_ratio": -0.6225508451461792, - "eval_logits/chosen": -2.965731382369995, - "eval_logits/rejected": -2.966355323791504, - "eval_logps/chosen": -0.8718044757843018, - "eval_logps/rejected": -1.158449649810791, - "eval_loss": 0.53301602602005, - "eval_nll_loss": 0.5004281997680664, - "eval_rewards/accuracies": 0.6448412537574768, - "eval_rewards/chosen": -0.043590229004621506, - "eval_rewards/margins": 0.014332256279885769, - "eval_rewards/rejected": -0.05792247876524925, - "eval_runtime": 139.8515, - "eval_samples_per_second": 14.258, - "eval_steps_per_second": 0.45, - "step": 1500 - }, - { - "epoch": 1.5836392239119035, - "grad_norm": 1.9123802783189798, - "learning_rate": 5.146850126549788e-06, - "log_odds_chosen": 1.6361440420150757, - "log_odds_ratio": -0.26433151960372925, - "logits/chosen": -2.943331003189087, - "logits/rejected": -2.9721503257751465, - "logps/chosen": -0.44553548097610474, - "logps/rejected": -1.2933813333511353, - "loss": 0.3044, - "nll_loss": 0.2870228588581085, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.022276774048805237, - "rewards/margins": 0.042392291128635406, - "rewards/rejected": -0.06466906517744064, - "step": 1510 - }, - { - "epoch": 1.5941269008914527, - "grad_norm": 1.9978617693896288, - "learning_rate": 5.129891760425772e-06, - "log_odds_chosen": 1.872454285621643, - "log_odds_ratio": -0.21693451702594757, - "logits/chosen": -2.9198169708251953, - "logits/rejected": -2.9594712257385254, - "logps/chosen": -0.4238964915275574, - "logps/rejected": -1.4147742986679077, - "loss": 0.2765, - "nll_loss": 0.2593707740306854, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.021194826811552048, - "rewards/margins": 0.049543894827365875, - "rewards/rejected": -0.07073871791362762, - "step": 1520 - }, - { - "epoch": 1.6046145778710015, - "grad_norm": 2.2358254561438966, - "learning_rate": 5.113099925649136e-06, - "log_odds_chosen": 1.7420718669891357, - "log_odds_ratio": -0.2600535750389099, - "logits/chosen": -2.9620399475097656, - "logits/rejected": -2.997101068496704, - "logps/chosen": -0.4705958366394043, - "logps/rejected": -1.435579538345337, - "loss": 0.2766, - "nll_loss": 0.28323301672935486, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.023529794067144394, - "rewards/margins": 0.048249177634716034, - "rewards/rejected": -0.07177898287773132, - "step": 1530 - }, - { - "epoch": 1.6151022548505507, - "grad_norm": 2.123071067312132, - "learning_rate": 5.096471914376255e-06, - "log_odds_chosen": 2.0446419715881348, - "log_odds_ratio": -0.20973734557628632, - "logits/chosen": -2.8849668502807617, - "logits/rejected": -2.91094970703125, - "logps/chosen": -0.42269793152809143, - "logps/rejected": -1.4985077381134033, - "loss": 0.2842, - "nll_loss": 0.24874058365821838, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.021134894341230392, - "rewards/margins": 0.053790487349033356, - "rewards/rejected": -0.07492538541555405, - "step": 1540 - }, - { - "epoch": 1.6255899318300995, - "grad_norm": 1.8574119456068037, - "learning_rate": 5.08000508000762e-06, - "log_odds_chosen": 1.8896774053573608, - "log_odds_ratio": -0.2109728306531906, - "logits/chosen": -2.9518914222717285, - "logits/rejected": -2.9677398204803467, - "logps/chosen": -0.42254775762557983, - "logps/rejected": -1.4004069566726685, - "loss": 0.2737, - "nll_loss": 0.26676517724990845, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02112739160656929, - "rewards/margins": 0.04889295622706413, - "rewards/rejected": -0.07002034783363342, - "step": 1550 - }, - { - "epoch": 1.6360776088096487, - "grad_norm": 2.012947859419835, - "learning_rate": 5.0636968354183334e-06, - "log_odds_chosen": 1.7877776622772217, - "log_odds_ratio": -0.2195170670747757, - "logits/chosen": -2.916713237762451, - "logits/rejected": -2.9442696571350098, - "logps/chosen": -0.4229874610900879, - "logps/rejected": -1.3620960712432861, - "loss": 0.2937, - "nll_loss": 0.28985968232154846, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.021149372681975365, - "rewards/margins": 0.046955425292253494, - "rewards/rejected": -0.06810478866100311, - "step": 1560 - }, - { - "epoch": 1.6465652857891977, - "grad_norm": 1.9554610757973563, - "learning_rate": 5.047544651250688e-06, - "log_odds_chosen": 1.9977741241455078, - "log_odds_ratio": -0.22808516025543213, - "logits/chosen": -2.95414137840271, - "logits/rejected": -2.9667911529541016, - "logps/chosen": -0.40563470125198364, - "logps/rejected": -1.493981122970581, - "loss": 0.2746, - "nll_loss": 0.25610029697418213, - "rewards/accuracies": 0.9312499761581421, - "rewards/chosen": -0.020281735807657242, - "rewards/margins": 0.05441732332110405, - "rewards/rejected": -0.07469905912876129, - "step": 1570 - }, - { - "epoch": 1.6570529627687467, - "grad_norm": 2.2417227837369094, - "learning_rate": 5.031546054266276e-06, - "log_odds_chosen": 1.8591692447662354, - "log_odds_ratio": -0.23143061995506287, - "logits/chosen": -3.0023272037506104, - "logits/rejected": -3.0128941535949707, - "logps/chosen": -0.46788668632507324, - "logps/rejected": -1.490392804145813, - "loss": 0.2962, - "nll_loss": 0.31111472845077515, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.02339433692395687, - "rewards/margins": 0.05112530663609505, - "rewards/rejected": -0.07451964914798737, - "step": 1580 - }, - { - "epoch": 1.667540639748296, - "grad_norm": 2.0152925811378846, - "learning_rate": 5.015698625755192e-06, - "log_odds_chosen": 1.9612891674041748, - "log_odds_ratio": -0.22349119186401367, - "logits/chosen": -2.9373695850372314, - "logits/rejected": -2.9659922122955322, - "logps/chosen": -0.40127071738243103, - "logps/rejected": -1.4479907751083374, - "loss": 0.2939, - "nll_loss": 0.2725040912628174, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02006353810429573, - "rewards/margins": 0.05233600735664368, - "rewards/rejected": -0.07239954173564911, - "step": 1590 - }, - { - "epoch": 1.6780283167278447, - "grad_norm": 1.9355725247245243, - "learning_rate": 5e-06, - "log_odds_chosen": 1.8742882013320923, - "log_odds_ratio": -0.21055075526237488, - "logits/chosen": -2.9387471675872803, - "logits/rejected": -2.9844515323638916, - "logps/chosen": -0.43298736214637756, - "logps/rejected": -1.4716593027114868, - "loss": 0.276, - "nll_loss": 0.26002392172813416, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02164936624467373, - "rewards/margins": 0.05193359777331352, - "rewards/rejected": -0.0735829621553421, - "step": 1600 - }, - { - "epoch": 1.6780283167278447, - "eval_log_odds_chosen": 0.4569767117500305, - "eval_log_odds_ratio": -0.6159732937812805, - "eval_logits/chosen": -2.932406187057495, - "eval_logits/rejected": -2.9357593059539795, - "eval_logps/chosen": -0.8832988142967224, - "eval_logps/rejected": -1.1878604888916016, - "eval_loss": 0.5367424488067627, - "eval_nll_loss": 0.5040929913520813, - "eval_rewards/accuracies": 0.6408730149269104, - "eval_rewards/chosen": -0.04416494444012642, - "eval_rewards/margins": 0.015228085219860077, - "eval_rewards/rejected": -0.0593930259346962, - "eval_runtime": 138.0302, - "eval_samples_per_second": 14.446, - "eval_steps_per_second": 0.456, - "step": 1600 - }, - { - "epoch": 1.688515993707394, - "grad_norm": 1.9448584897613828, - "learning_rate": 4.984447862792268e-06, - "log_odds_chosen": 2.0258474349975586, - "log_odds_ratio": -0.2537488639354706, - "logits/chosen": -2.9370341300964355, - "logits/rejected": -2.959137439727783, - "logps/chosen": -0.4205976128578186, - "logps/rejected": -1.51674485206604, - "loss": 0.2805, - "nll_loss": 0.2590489387512207, - "rewards/accuracies": 0.9312499761581421, - "rewards/chosen": -0.0210298802703619, - "rewards/margins": 0.054807353764772415, - "rewards/rejected": -0.07583723217248917, - "step": 1610 - }, - { - "epoch": 1.6990036706869427, - "grad_norm": 2.2985078763398503, - "learning_rate": 4.969039949999534e-06, - "log_odds_chosen": 1.9926655292510986, - "log_odds_ratio": -0.209347203373909, - "logits/chosen": -2.9543755054473877, - "logits/rejected": -2.979072093963623, - "logps/chosen": -0.4242986738681793, - "logps/rejected": -1.527527093887329, - "loss": 0.2829, - "nll_loss": 0.28810399770736694, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.021214932203292847, - "rewards/margins": 0.05516142398118973, - "rewards/rejected": -0.07637635618448257, - "step": 1620 - }, - { - "epoch": 1.709491347666492, - "grad_norm": 1.978508364107179, - "learning_rate": 4.9537740461807e-06, - "log_odds_chosen": 1.7989534139633179, - "log_odds_ratio": -0.22280922532081604, - "logits/chosen": -2.9272611141204834, - "logits/rejected": -2.933403968811035, - "logps/chosen": -0.4125545024871826, - "logps/rejected": -1.371010422706604, - "loss": 0.2723, - "nll_loss": 0.27273207902908325, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.02062772400677204, - "rewards/margins": 0.04792279377579689, - "rewards/rejected": -0.06855051219463348, - "step": 1630 - }, - { - "epoch": 1.719979024646041, - "grad_norm": 2.5294696595366375, - "learning_rate": 4.938647983247949e-06, - "log_odds_chosen": 1.8762280941009521, - "log_odds_ratio": -0.23052379488945007, - "logits/chosen": -2.9176859855651855, - "logits/rejected": -2.937653064727783, - "logps/chosen": -0.4308241307735443, - "logps/rejected": -1.4621460437774658, - "loss": 0.2707, - "nll_loss": 0.24837055802345276, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.021541204303503036, - "rewards/margins": 0.05156610533595085, - "rewards/rejected": -0.07310730963945389, - "step": 1640 - }, - { - "epoch": 1.73046670162559, - "grad_norm": 1.9845638290615137, - "learning_rate": 4.9236596391733095e-06, - "log_odds_chosen": 1.9353539943695068, - "log_odds_ratio": -0.22219491004943848, - "logits/chosen": -2.9324100017547607, - "logits/rejected": -2.9492199420928955, - "logps/chosen": -0.4047132134437561, - "logps/rejected": -1.447388768196106, - "loss": 0.2921, - "nll_loss": 0.2786787152290344, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.020235659554600716, - "rewards/margins": 0.05213377624750137, - "rewards/rejected": -0.07236944139003754, - "step": 1650 - }, - { - "epoch": 1.740954378605139, - "grad_norm": 2.1313335783196914, - "learning_rate": 4.9088069367381605e-06, - "log_odds_chosen": 1.9517314434051514, - "log_odds_ratio": -0.19579176604747772, - "logits/chosen": -2.9807212352752686, - "logits/rejected": -3.004951000213623, - "logps/chosen": -0.4060528874397278, - "logps/rejected": -1.4121928215026855, - "loss": 0.2851, - "nll_loss": 0.27768373489379883, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.020302647724747658, - "rewards/margins": 0.05030699446797371, - "rewards/rejected": -0.07060963660478592, - "step": 1660 - }, - { - "epoch": 1.751442055584688, - "grad_norm": 1.893515732849545, - "learning_rate": 4.894087842323964e-06, - "log_odds_chosen": 1.8834346532821655, - "log_odds_ratio": -0.20945528149604797, - "logits/chosen": -2.9691452980041504, - "logits/rejected": -3.0074009895324707, - "logps/chosen": -0.4027465283870697, - "logps/rejected": -1.374361276626587, - "loss": 0.2926, - "nll_loss": 0.26718848943710327, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.020137326791882515, - "rewards/margins": 0.04858074709773064, - "rewards/rejected": -0.0687180757522583, - "step": 1670 - }, - { - "epoch": 1.7619297325642371, - "grad_norm": 2.0915190498544263, - "learning_rate": 4.8795003647426654e-06, - "log_odds_chosen": 1.8165385723114014, - "log_odds_ratio": -0.21812555193901062, - "logits/chosen": -3.0662589073181152, - "logits/rejected": -3.089877128601074, - "logps/chosen": -0.40138545632362366, - "logps/rejected": -1.3200931549072266, - "loss": 0.2998, - "nll_loss": 0.29331129789352417, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.020069271326065063, - "rewards/margins": 0.04593539237976074, - "rewards/rejected": -0.06600465625524521, - "step": 1680 - }, - { - "epoch": 1.772417409543786, - "grad_norm": 2.1457501870245417, - "learning_rate": 4.865042554105199e-06, - "log_odds_chosen": 1.869539499282837, - "log_odds_ratio": -0.2280159890651703, - "logits/chosen": -2.991488456726074, - "logits/rejected": -2.98630690574646, - "logps/chosen": -0.4090718626976013, - "logps/rejected": -1.36448073387146, - "loss": 0.2858, - "nll_loss": 0.2776942253112793, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.020453594624996185, - "rewards/margins": 0.04777044430375099, - "rewards/rejected": -0.06822402775287628, - "step": 1690 - }, - { - "epoch": 1.7829050865233351, - "grad_norm": 2.3665022543070093, - "learning_rate": 4.850712500726659e-06, - "log_odds_chosen": 1.9791815280914307, - "log_odds_ratio": -0.19878429174423218, - "logits/chosen": -2.9824297428131104, - "logits/rejected": -3.022101640701294, - "logps/chosen": -0.4144412875175476, - "logps/rejected": -1.4597278833389282, - "loss": 0.2715, - "nll_loss": 0.28446242213249207, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.02072206512093544, - "rewards/margins": 0.052264340221881866, - "rewards/rejected": -0.07298640161752701, - "step": 1700 - }, - { - "epoch": 1.7829050865233351, - "eval_log_odds_chosen": 0.4425116777420044, - "eval_log_odds_ratio": -0.6271889209747314, - "eval_logits/chosen": -3.019425392150879, - "eval_logits/rejected": -3.020922899246216, - "eval_logps/chosen": -0.8710321187973022, - "eval_logps/rejected": -1.1603412628173828, - "eval_loss": 0.5348805785179138, - "eval_nll_loss": 0.5024282336235046, - "eval_rewards/accuracies": 0.6448412537574768, - "eval_rewards/chosen": -0.04355160519480705, - "eval_rewards/margins": 0.014465462416410446, - "eval_rewards/rejected": -0.0580170638859272, - "eval_runtime": 136.3216, - "eval_samples_per_second": 14.627, - "eval_steps_per_second": 0.462, - "step": 1700 - }, - { - "epoch": 1.7933927635028841, - "grad_norm": 1.847904822728325, - "learning_rate": 4.836508334066745e-06, - "log_odds_chosen": 1.9795688390731812, - "log_odds_ratio": -0.2207694798707962, - "logits/chosen": -3.0054497718811035, - "logits/rejected": -3.0154829025268555, - "logps/chosen": -0.4081927239894867, - "logps/rejected": -1.4390795230865479, - "loss": 0.264, - "nll_loss": 0.24716749787330627, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.020409639924764633, - "rewards/margins": 0.051544345915317535, - "rewards/rejected": -0.07195398211479187, - "step": 1710 - }, - { - "epoch": 1.8038804404824331, - "grad_norm": 1.7750027737169987, - "learning_rate": 4.822428221704122e-06, - "log_odds_chosen": 1.926945686340332, - "log_odds_ratio": -0.22434870898723602, - "logits/chosen": -3.0268912315368652, - "logits/rejected": -3.035226583480835, - "logps/chosen": -0.43201422691345215, - "logps/rejected": -1.498827576637268, - "loss": 0.2864, - "nll_loss": 0.25820285081863403, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.021600713953375816, - "rewards/margins": 0.053340665996074677, - "rewards/rejected": -0.07494138181209564, - "step": 1720 - }, - { - "epoch": 1.8143681174619821, - "grad_norm": 2.0662716537028354, - "learning_rate": 4.8084703683434506e-06, - "log_odds_chosen": 1.974784255027771, - "log_odds_ratio": -0.21157677471637726, - "logits/chosen": -3.010627031326294, - "logits/rejected": -2.9982268810272217, - "logps/chosen": -0.4355824589729309, - "logps/rejected": -1.5232689380645752, - "loss": 0.2903, - "nll_loss": 0.2755037248134613, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.021779123693704605, - "rewards/margins": 0.05438433215022087, - "rewards/rejected": -0.07616344839334488, - "step": 1730 - }, - { - "epoch": 1.8248557944415311, - "grad_norm": 2.1360074988574445, - "learning_rate": 4.794633014853843e-06, - "log_odds_chosen": 1.847333312034607, - "log_odds_ratio": -0.2377551794052124, - "logits/chosen": -3.006833553314209, - "logits/rejected": -3.0122854709625244, - "logps/chosen": -0.4366019368171692, - "logps/rejected": -1.4164003133773804, - "loss": 0.304, - "nll_loss": 0.29017573595046997, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.02183009497821331, - "rewards/margins": 0.0489899218082428, - "rewards/rejected": -0.07082001864910126, - "step": 1740 - }, - { - "epoch": 1.8353434714210803, - "grad_norm": 1.9891927691131213, - "learning_rate": 4.780914437337575e-06, - "log_odds_chosen": 1.8539154529571533, - "log_odds_ratio": -0.23103201389312744, - "logits/chosen": -2.9830121994018555, - "logits/rejected": -2.9818801879882812, - "logps/chosen": -0.4274306297302246, - "logps/rejected": -1.4196858406066895, - "loss": 0.2958, - "nll_loss": 0.2937518060207367, - "rewards/accuracies": 0.956250011920929, - "rewards/chosen": -0.02137153223156929, - "rewards/margins": 0.049612756818532944, - "rewards/rejected": -0.07098428905010223, - "step": 1750 - }, - { - "epoch": 1.8458311484006291, - "grad_norm": 1.827588117065436, - "learning_rate": 4.767312946227961e-06, - "log_odds_chosen": 2.2149860858917236, - "log_odds_ratio": -0.2075362503528595, - "logits/chosen": -2.9530441761016846, - "logits/rejected": -2.9839682579040527, - "logps/chosen": -0.391355037689209, - "logps/rejected": -1.6375446319580078, - "loss": 0.2721, - "nll_loss": 0.2694031000137329, - "rewards/accuracies": 0.9375, - "rewards/chosen": -0.01956775411963463, - "rewards/margins": 0.06230948120355606, - "rewards/rejected": -0.08187723159790039, - "step": 1760 - }, - { - "epoch": 1.8563188253801783, - "grad_norm": 1.8203811521479276, - "learning_rate": 4.7538268854152834e-06, - "log_odds_chosen": 1.7995598316192627, - "log_odds_ratio": -0.244699165225029, - "logits/chosen": -3.011706829071045, - "logits/rejected": -3.024837017059326, - "logps/chosen": -0.4394347071647644, - "logps/rejected": -1.4033676385879517, - "loss": 0.2771, - "nll_loss": 0.25858861207962036, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.02197173610329628, - "rewards/margins": 0.04819665104150772, - "rewards/rejected": -0.0701683908700943, - "step": 1770 - }, - { - "epoch": 1.8668065023597273, - "grad_norm": 2.2623646165216313, - "learning_rate": 4.740454631399773e-06, - "log_odds_chosen": 1.962255835533142, - "log_odds_ratio": -0.23438410460948944, - "logits/chosen": -2.949073314666748, - "logits/rejected": -2.989229202270508, - "logps/chosen": -0.3985145688056946, - "logps/rejected": -1.4544894695281982, - "loss": 0.2941, - "nll_loss": 0.29249390959739685, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.019925730302929878, - "rewards/margins": 0.052798740565776825, - "rewards/rejected": -0.07272447645664215, - "step": 1780 - }, - { - "epoch": 1.8772941793392763, - "grad_norm": 2.5104520915032538, - "learning_rate": 4.727194592470656e-06, - "log_odds_chosen": 2.0800955295562744, - "log_odds_ratio": -0.19981749355793, - "logits/chosen": -2.9771628379821777, - "logits/rejected": -3.0005829334259033, - "logps/chosen": -0.42085084319114685, - "logps/rejected": -1.603994607925415, - "loss": 0.2844, - "nll_loss": 0.2677140235900879, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.02104254439473152, - "rewards/margins": 0.05915718153119087, - "rewards/rejected": -0.08019973337650299, - "step": 1790 - }, - { - "epoch": 1.8877818563188253, - "grad_norm": 2.077913541951449, - "learning_rate": 4.714045207910318e-06, - "log_odds_chosen": 2.1426799297332764, - "log_odds_ratio": -0.18838170170783997, - "logits/chosen": -2.950552463531494, - "logits/rejected": -2.9804420471191406, - "logps/chosen": -0.41320332884788513, - "logps/rejected": -1.622671365737915, - "loss": 0.2717, - "nll_loss": 0.2544669210910797, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.020660167559981346, - "rewards/margins": 0.060473401099443436, - "rewards/rejected": -0.08113356679677963, - "step": 1800 - }, - { - "epoch": 1.8877818563188253, - "eval_log_odds_chosen": 0.48237088322639465, - "eval_log_odds_ratio": -0.6183955669403076, - "eval_logits/chosen": -2.9562783241271973, - "eval_logits/rejected": -2.957892894744873, - "eval_logps/chosen": -0.8997318148612976, - "eval_logps/rejected": -1.2325206995010376, - "eval_loss": 0.5340895652770996, - "eval_nll_loss": 0.5023403763771057, - "eval_rewards/accuracies": 0.6547619104385376, - "eval_rewards/chosen": -0.04498659446835518, - "eval_rewards/margins": 0.01663944497704506, - "eval_rewards/rejected": -0.06162603944540024, - "eval_runtime": 136.1464, - "eval_samples_per_second": 14.646, - "eval_steps_per_second": 0.463, - "step": 1800 - }, - { - "epoch": 1.8982695332983743, - "grad_norm": 1.896252578291677, - "learning_rate": 4.701004947222685e-06, - "log_odds_chosen": 2.0811541080474854, - "log_odds_ratio": -0.20500631630420685, - "logits/chosen": -3.000387668609619, - "logits/rejected": -2.983591079711914, - "logps/chosen": -0.4098430573940277, - "logps/rejected": -1.608665108680725, - "loss": 0.2794, - "nll_loss": 0.25453388690948486, - "rewards/accuracies": 0.9437500238418579, - "rewards/chosen": -0.020492153242230415, - "rewards/margins": 0.05994110181927681, - "rewards/rejected": -0.08043324947357178, - "step": 1810 - }, - { - "epoch": 1.9087572102779236, - "grad_norm": 2.019085371673625, - "learning_rate": 4.688072309384955e-06, - "log_odds_chosen": 2.0144619941711426, - "log_odds_ratio": -0.2020682841539383, - "logits/chosen": -2.9534127712249756, - "logits/rejected": -2.9533755779266357, - "logps/chosen": -0.3999931216239929, - "logps/rejected": -1.4992988109588623, - "loss": 0.2775, - "nll_loss": 0.26274845004081726, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.019999656826257706, - "rewards/margins": 0.054965294897556305, - "rewards/rejected": -0.07496494799852371, - "step": 1820 - }, - { - "epoch": 1.9192448872574723, - "grad_norm": 1.9263871107241788, - "learning_rate": 4.675245822121844e-06, - "log_odds_chosen": 2.0367493629455566, - "log_odds_ratio": -0.20607483386993408, - "logits/chosen": -2.9868836402893066, - "logits/rejected": -3.000213861465454, - "logps/chosen": -0.4244080185890198, - "logps/rejected": -1.5761488676071167, - "loss": 0.2923, - "nll_loss": 0.2808459997177124, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02122039906680584, - "rewards/margins": 0.057587046176195145, - "rewards/rejected": -0.07880743592977524, - "step": 1830 - }, - { - "epoch": 1.9297325642370216, - "grad_norm": 2.1487838733941365, - "learning_rate": 4.662524041201569e-06, - "log_odds_chosen": 2.0472216606140137, - "log_odds_ratio": -0.22086529433727264, - "logits/chosen": -2.9925904273986816, - "logits/rejected": -2.985816240310669, - "logps/chosen": -0.4373515248298645, - "logps/rejected": -1.5831472873687744, - "loss": 0.2713, - "nll_loss": 0.2551635801792145, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.021867576986551285, - "rewards/margins": 0.057289790362119675, - "rewards/rejected": -0.07915736734867096, - "step": 1840 - }, - { - "epoch": 1.9402202412165706, - "grad_norm": 2.0463386352717112, - "learning_rate": 4.649905549752772e-06, - "log_odds_chosen": 2.1467113494873047, - "log_odds_ratio": -0.21497011184692383, - "logits/chosen": -2.938457727432251, - "logits/rejected": -2.9367523193359375, - "logps/chosen": -0.4192470610141754, - "logps/rejected": -1.63271164894104, - "loss": 0.2767, - "nll_loss": 0.2981775999069214, - "rewards/accuracies": 0.9312499761581421, - "rewards/chosen": -0.02096235193312168, - "rewards/margins": 0.06067322567105293, - "rewards/rejected": -0.08163557946681976, - "step": 1850 - }, - { - "epoch": 1.9507079181961196, - "grad_norm": 1.9930187660935812, - "learning_rate": 4.6373889576016826e-06, - "log_odds_chosen": 2.145296573638916, - "log_odds_ratio": -0.19072812795639038, - "logits/chosen": -2.9529764652252197, - "logits/rejected": -2.960404634475708, - "logps/chosen": -0.407731294631958, - "logps/rejected": -1.5777407884597778, - "loss": 0.2761, - "nll_loss": 0.2852553129196167, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.02038656547665596, - "rewards/margins": 0.05850047990679741, - "rewards/rejected": -0.07888703793287277, - "step": 1860 - }, - { - "epoch": 1.9611955951756685, - "grad_norm": 2.0042665222271756, - "learning_rate": 4.624972900628803e-06, - "log_odds_chosen": 2.0522494316101074, - "log_odds_ratio": -0.20059652626514435, - "logits/chosen": -2.932502269744873, - "logits/rejected": -2.9307363033294678, - "logps/chosen": -0.4203645586967468, - "logps/rejected": -1.5539976358413696, - "loss": 0.276, - "nll_loss": 0.2738272547721863, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02101822756230831, - "rewards/margins": 0.05668165162205696, - "rewards/rejected": -0.07769988477230072, - "step": 1870 - }, - { - "epoch": 1.9716832721552175, - "grad_norm": 2.0226547316915258, - "learning_rate": 4.6126560401444256e-06, - "log_odds_chosen": 2.0710301399230957, - "log_odds_ratio": -0.19392071664333344, - "logits/chosen": -3.015066623687744, - "logits/rejected": -2.99493145942688, - "logps/chosen": -0.43072837591171265, - "logps/rejected": -1.6065874099731445, - "loss": 0.2748, - "nll_loss": 0.2821330428123474, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.02153642103075981, - "rewards/margins": 0.05879295617341995, - "rewards/rejected": -0.08032937347888947, - "step": 1880 - }, - { - "epoch": 1.9821709491347668, - "grad_norm": 2.567857697275732, - "learning_rate": 4.600437062282362e-06, - "log_odds_chosen": 1.9227994680404663, - "log_odds_ratio": -0.2224545031785965, - "logits/chosen": -3.0251965522766113, - "logits/rejected": -2.993910789489746, - "logps/chosen": -0.4456098675727844, - "logps/rejected": -1.529626488685608, - "loss": 0.2788, - "nll_loss": 0.28787270188331604, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.0222804956138134, - "rewards/margins": 0.054200828075408936, - "rewards/rejected": -0.07648131996393204, - "step": 1890 - }, - { - "epoch": 1.9926586261143155, - "grad_norm": 2.1545883447921654, - "learning_rate": 4.588314677411235e-06, - "log_odds_chosen": 2.2162415981292725, - "log_odds_ratio": -0.20383968949317932, - "logits/chosen": -3.039658784866333, - "logits/rejected": -3.022245407104492, - "logps/chosen": -0.420427143573761, - "logps/rejected": -1.6983455419540405, - "loss": 0.2857, - "nll_loss": 0.24534273147583008, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": -0.02102135680615902, - "rewards/margins": 0.06389592587947845, - "rewards/rejected": -0.08491728454828262, - "step": 1900 - }, - { - "epoch": 1.9926586261143155, - "eval_log_odds_chosen": 0.48923251032829285, - "eval_log_odds_ratio": -0.6193312406539917, - "eval_logits/chosen": -3.0350046157836914, - "eval_logits/rejected": -3.0279133319854736, - "eval_logps/chosen": -0.908783495426178, - "eval_logps/rejected": -1.2409300804138184, - "eval_loss": 0.5407980680465698, - "eval_nll_loss": 0.5090586543083191, - "eval_rewards/accuracies": 0.6547619104385376, - "eval_rewards/chosen": -0.04543917626142502, - "eval_rewards/margins": 0.016607332974672318, - "eval_rewards/rejected": -0.062046512961387634, - "eval_runtime": 137.1653, - "eval_samples_per_second": 14.537, - "eval_steps_per_second": 0.459, - "step": 1900 - }, - { - "epoch": 2.0031463030938648, - "grad_norm": 2.4971175632899385, - "learning_rate": 4.576287619562756e-06, - "log_odds_chosen": 2.549215793609619, - "log_odds_ratio": -0.13884183764457703, - "logits/chosen": -3.0293986797332764, - "logits/rejected": -3.0052542686462402, - "logps/chosen": -0.3389069139957428, - "logps/rejected": -1.6784775257110596, - "loss": 0.2535, - "nll_loss": 0.2399848997592926, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.01694534718990326, - "rewards/margins": 0.06697852909564972, - "rewards/rejected": -0.08392388373613358, - "step": 1910 - }, - { - "epoch": 2.0136339800734135, - "grad_norm": 2.5031224034871475, - "learning_rate": 4.564354645876385e-06, - "log_odds_chosen": 4.333657741546631, - "log_odds_ratio": -0.02762582339346409, - "logits/chosen": -2.869049549102783, - "logits/rejected": -2.8186068534851074, - "logps/chosen": -0.1433320939540863, - "logps/rejected": -2.334181547164917, - "loss": 0.1236, - "nll_loss": 0.11940746009349823, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007166605442762375, - "rewards/margins": 0.10954247415065765, - "rewards/rejected": -0.11670909076929092, - "step": 1920 - }, - { - "epoch": 2.0241216570529628, - "grad_norm": 1.9586057770651872, - "learning_rate": 4.552514536059854e-06, - "log_odds_chosen": 3.8062407970428467, - "log_odds_ratio": -0.0499381422996521, - "logits/chosen": -2.9369876384735107, - "logits/rejected": -2.963967800140381, - "logps/chosen": -0.1607118844985962, - "logps/rejected": -1.9827187061309814, - "loss": 0.116, - "nll_loss": 0.11325522512197495, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.00803559459745884, - "rewards/margins": 0.09110033512115479, - "rewards/rejected": -0.09913593530654907, - "step": 1930 - }, - { - "epoch": 2.034609334032512, - "grad_norm": 2.173705177159571, - "learning_rate": 4.540766091864998e-06, - "log_odds_chosen": 3.9211831092834473, - "log_odds_ratio": -0.03853369504213333, - "logits/chosen": -2.848071575164795, - "logits/rejected": -2.927175760269165, - "logps/chosen": -0.14356736838817596, - "logps/rejected": -1.959979772567749, - "loss": 0.1167, - "nll_loss": 0.11882974952459335, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007178368978202343, - "rewards/margins": 0.09082063287496567, - "rewards/rejected": -0.09799900650978088, - "step": 1940 - }, - { - "epoch": 2.0450970110120608, - "grad_norm": 1.7557144572827617, - "learning_rate": 4.529108136578383e-06, - "log_odds_chosen": 4.060091495513916, - "log_odds_ratio": -0.028795290738344193, - "logits/chosen": -2.8138527870178223, - "logits/rejected": -2.8606162071228027, - "logps/chosen": -0.13301293551921844, - "logps/rejected": -2.0062737464904785, - "loss": 0.1151, - "nll_loss": 0.1191815584897995, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006650646682828665, - "rewards/margins": 0.09366302937269211, - "rewards/rejected": -0.10031367838382721, - "step": 1950 - }, - { - "epoch": 2.05558468799161, - "grad_norm": 1.69960315567237, - "learning_rate": 4.517539514526257e-06, - "log_odds_chosen": 4.352217674255371, - "log_odds_ratio": -0.03757786005735397, - "logits/chosen": -2.819655656814575, - "logits/rejected": -2.8428378105163574, - "logps/chosen": -0.14081783592700958, - "logps/rejected": -2.33030104637146, - "loss": 0.1135, - "nll_loss": 0.11204487085342407, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007040892727673054, - "rewards/margins": 0.10947415977716446, - "rewards/rejected": -0.11651506274938583, - "step": 1960 - }, - { - "epoch": 2.0660723649711588, - "grad_norm": 1.991621297994473, - "learning_rate": 4.506059090593329e-06, - "log_odds_chosen": 4.156961917877197, - "log_odds_ratio": -0.0386335626244545, - "logits/chosen": -2.8222968578338623, - "logits/rejected": -2.880376100540161, - "logps/chosen": -0.15631213784217834, - "logps/rejected": -2.2803502082824707, - "loss": 0.1083, - "nll_loss": 0.11318318545818329, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007815606892108917, - "rewards/margins": 0.1062019094824791, - "rewards/rejected": -0.11401752382516861, - "step": 1970 - }, - { - "epoch": 2.076560041950708, - "grad_norm": 1.8671392728507943, - "learning_rate": 4.4946657497549474e-06, - "log_odds_chosen": 4.751786708831787, - "log_odds_ratio": -0.02287628874182701, - "logits/chosen": -2.8250374794006348, - "logits/rejected": -2.858389377593994, - "logps/chosen": -0.136850968003273, - "logps/rejected": -2.61843204498291, - "loss": 0.1149, - "nll_loss": 0.11261866241693497, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006842548493295908, - "rewards/margins": 0.12407903373241425, - "rewards/rejected": -0.13092158734798431, - "step": 1980 - }, - { - "epoch": 2.0870477189302568, - "grad_norm": 2.047221073846021, - "learning_rate": 4.483358396622204e-06, - "log_odds_chosen": 4.551729202270508, - "log_odds_ratio": -0.029045408591628075, - "logits/chosen": -2.8212010860443115, - "logits/rejected": -2.863682270050049, - "logps/chosen": -0.13936151564121246, - "logps/rejected": -2.4473021030426025, - "loss": 0.1129, - "nll_loss": 0.11166741698980331, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006968076340854168, - "rewards/margins": 0.11539702117443085, - "rewards/rejected": -0.12236510217189789, - "step": 1990 - }, - { - "epoch": 2.097535395909806, - "grad_norm": 2.1099833794179723, - "learning_rate": 4.47213595499958e-06, - "log_odds_chosen": 4.558366298675537, - "log_odds_ratio": -0.01906474307179451, - "logits/chosen": -2.8424153327941895, - "logits/rejected": -2.877136707305908, - "logps/chosen": -0.14121726155281067, - "logps/rejected": -2.4738833904266357, - "loss": 0.1137, - "nll_loss": 0.1110328808426857, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0070608630776405334, - "rewards/margins": 0.11663329601287842, - "rewards/rejected": -0.12369415909051895, - "step": 2000 - }, - { - "epoch": 2.097535395909806, - "eval_log_odds_chosen": 0.5767443776130676, - "eval_log_odds_ratio": -0.6272528171539307, - "eval_logits/chosen": -2.87036395072937, - "eval_logits/rejected": -2.881497383117676, - "eval_logps/chosen": -1.2408413887023926, - "eval_logps/rejected": -1.6761136054992676, - "eval_loss": 0.6877180337905884, - "eval_nll_loss": 0.6538823843002319, - "eval_rewards/accuracies": 0.670634925365448, - "eval_rewards/chosen": -0.06204206869006157, - "eval_rewards/margins": 0.021763615310192108, - "eval_rewards/rejected": -0.08380568027496338, - "eval_runtime": 137.068, - "eval_samples_per_second": 14.548, - "eval_steps_per_second": 0.46, - "step": 2000 - }, - { - "epoch": 2.108023072889355, - "grad_norm": 1.7758830781899906, - "learning_rate": 4.4609973674547055e-06, - "log_odds_chosen": 4.593904495239258, - "log_odds_ratio": -0.033291045576334, - "logits/chosen": -2.856330394744873, - "logits/rejected": -2.8690733909606934, - "logps/chosen": -0.1400183141231537, - "logps/rejected": -2.536652088165283, - "loss": 0.1039, - "nll_loss": 0.10139288008213043, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007000915706157684, - "rewards/margins": 0.11983168125152588, - "rewards/rejected": -0.12683258950710297, - "step": 2010 - }, - { - "epoch": 2.118510749868904, - "grad_norm": 2.6416736862275076, - "learning_rate": 4.449941594899848e-06, - "log_odds_chosen": 4.607335090637207, - "log_odds_ratio": -0.028559138998389244, - "logits/chosen": -2.7992746829986572, - "logits/rejected": -2.8301546573638916, - "logps/chosen": -0.14062660932540894, - "logps/rejected": -2.5437684059143066, - "loss": 0.1201, - "nll_loss": 0.1216670423746109, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007031330373138189, - "rewards/margins": 0.12015708535909653, - "rewards/rejected": -0.12718841433525085, - "step": 2020 - }, - { - "epoch": 2.128998426848453, - "grad_norm": 2.094070218470564, - "learning_rate": 4.438967616184754e-06, - "log_odds_chosen": 4.340805530548096, - "log_odds_ratio": -0.027936171740293503, - "logits/chosen": -2.823608875274658, - "logits/rejected": -2.8253750801086426, - "logps/chosen": -0.13957419991493225, - "logps/rejected": -2.268900156021118, - "loss": 0.1108, - "nll_loss": 0.1126783937215805, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006978710647672415, - "rewards/margins": 0.10646629333496094, - "rewards/rejected": -0.11344502121210098, - "step": 2030 - }, - { - "epoch": 2.139486103828002, - "grad_norm": 2.222098137194295, - "learning_rate": 4.428074427700477e-06, - "log_odds_chosen": 4.698141098022461, - "log_odds_ratio": -0.02707051672041416, - "logits/chosen": -2.8169960975646973, - "logits/rejected": -2.8297157287597656, - "logps/chosen": -0.1413937509059906, - "logps/rejected": -2.65130877494812, - "loss": 0.1166, - "nll_loss": 0.11614535748958588, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007069687359035015, - "rewards/margins": 0.1254957616329193, - "rewards/rejected": -0.1325654536485672, - "step": 2040 - }, - { - "epoch": 2.149973780807551, - "grad_norm": 2.1988466339750317, - "learning_rate": 4.417261042993862e-06, - "log_odds_chosen": 4.824273109436035, - "log_odds_ratio": -0.022720973938703537, - "logits/chosen": -2.8039610385894775, - "logits/rejected": -2.795748710632324, - "logps/chosen": -0.12069626152515411, - "logps/rejected": -2.613525390625, - "loss": 0.1113, - "nll_loss": 0.10357411205768585, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006034812889993191, - "rewards/margins": 0.12464147806167603, - "rewards/rejected": -0.1306762993335724, - "step": 2050 - }, - { - "epoch": 2.1604614577871, - "grad_norm": 1.9312492998690272, - "learning_rate": 4.406526492392318e-06, - "log_odds_chosen": 4.532221794128418, - "log_odds_ratio": -0.025564473122358322, - "logits/chosen": -2.856283664703369, - "logits/rejected": -2.847923994064331, - "logps/chosen": -0.15458881855010986, - "logps/rejected": -2.556361198425293, - "loss": 0.1171, - "nll_loss": 0.1105358749628067, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007729442324489355, - "rewards/margins": 0.1200886145234108, - "rewards/rejected": -0.1278180480003357, - "step": 2060 - }, - { - "epoch": 2.170949134766649, - "grad_norm": 2.184212774032157, - "learning_rate": 4.39586982263858e-06, - "log_odds_chosen": 4.760067462921143, - "log_odds_ratio": -0.025417357683181763, - "logits/chosen": -2.8176796436309814, - "logits/rejected": -2.818103313446045, - "logps/chosen": -0.15180301666259766, - "logps/rejected": -2.774660110473633, - "loss": 0.1148, - "nll_loss": 0.11588319391012192, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007590149994939566, - "rewards/margins": 0.13114285469055176, - "rewards/rejected": -0.13873299956321716, - "step": 2070 - }, - { - "epoch": 2.1814368117461984, - "grad_norm": 2.151555777196694, - "learning_rate": 4.385290096535147e-06, - "log_odds_chosen": 4.732907772064209, - "log_odds_ratio": -0.026212304830551147, - "logits/chosen": -2.859835147857666, - "logits/rejected": -2.857645034790039, - "logps/chosen": -0.13824030756950378, - "logps/rejected": -2.6506001949310303, - "loss": 0.1132, - "nll_loss": 0.11115143448114395, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.006912014447152615, - "rewards/margins": 0.12561801075935364, - "rewards/rejected": -0.13253000378608704, - "step": 2080 - }, - { - "epoch": 2.191924488725747, - "grad_norm": 3.2431795321399486, - "learning_rate": 4.374786392598072e-06, - "log_odds_chosen": 4.578325271606445, - "log_odds_ratio": -0.03994257375597954, - "logits/chosen": -2.8212687969207764, - "logits/rejected": -2.7516632080078125, - "logps/chosen": -0.1504596322774887, - "logps/rejected": -2.5710039138793945, - "loss": 0.1095, - "nll_loss": 0.10720662772655487, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007522981613874435, - "rewards/margins": 0.12102720886468887, - "rewards/rejected": -0.128550186753273, - "step": 2090 - }, - { - "epoch": 2.2024121657052964, - "grad_norm": 2.6693753745610076, - "learning_rate": 4.364357804719848e-06, - "log_odds_chosen": 4.707537651062012, - "log_odds_ratio": -0.025204619392752647, - "logits/chosen": -2.798999309539795, - "logits/rejected": -2.794037342071533, - "logps/chosen": -0.15521793067455292, - "logps/rejected": -2.689946174621582, - "loss": 0.1192, - "nll_loss": 0.12550954520702362, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007760896347463131, - "rewards/margins": 0.12673643231391907, - "rewards/rejected": -0.13449731469154358, - "step": 2100 - }, - { - "epoch": 2.2024121657052964, - "eval_log_odds_chosen": 0.6958096623420715, - "eval_log_odds_ratio": -0.6209548115730286, - "eval_logits/chosen": -2.837247610092163, - "eval_logits/rejected": -2.8433148860931396, - "eval_logps/chosen": -1.4121639728546143, - "eval_logps/rejected": -1.9619879722595215, - "eval_loss": 0.7576995491981506, - "eval_nll_loss": 0.7199162244796753, - "eval_rewards/accuracies": 0.6726190447807312, - "eval_rewards/chosen": -0.07060819864273071, - "eval_rewards/margins": 0.027491191402077675, - "eval_rewards/rejected": -0.09809939563274384, - "eval_runtime": 136.9058, - "eval_samples_per_second": 14.565, - "eval_steps_per_second": 0.46, - "step": 2100 - }, - { - "epoch": 2.212899842684845, - "grad_norm": 1.7712476287108132, - "learning_rate": 4.354003441841081e-06, - "log_odds_chosen": 4.905824184417725, - "log_odds_ratio": -0.02992095984518528, - "logits/chosen": -2.8259618282318115, - "logits/rejected": -2.760521650314331, - "logps/chosen": -0.13811610639095306, - "logps/rejected": -2.7983617782592773, - "loss": 0.1173, - "nll_loss": 0.12010955810546875, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.006905805319547653, - "rewards/margins": 0.13301227986812592, - "rewards/rejected": -0.13991808891296387, - "step": 2110 - }, - { - "epoch": 2.2233875196643944, - "grad_norm": 1.6446106852737563, - "learning_rate": 4.3437224276306945e-06, - "log_odds_chosen": 4.906925201416016, - "log_odds_ratio": -0.017224887385964394, - "logits/chosen": -2.838736057281494, - "logits/rejected": -2.8536746501922607, - "logps/chosen": -0.16129423677921295, - "logps/rejected": -2.8627591133117676, - "loss": 0.1147, - "nll_loss": 0.12654295563697815, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008064712397754192, - "rewards/margins": 0.1350732445716858, - "rewards/rejected": -0.14313796162605286, - "step": 2120 - }, - { - "epoch": 2.233875196643943, - "grad_norm": 1.7769911595186116, - "learning_rate": 4.333513900174396e-06, - "log_odds_chosen": 4.821990966796875, - "log_odds_ratio": -0.026227790862321854, - "logits/chosen": -2.829463481903076, - "logits/rejected": -2.842454433441162, - "logps/chosen": -0.1390562653541565, - "logps/rejected": -2.760815143585205, - "loss": 0.1215, - "nll_loss": 0.11114709079265594, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.0069528138265013695, - "rewards/margins": 0.13108794391155243, - "rewards/rejected": -0.13804076611995697, - "step": 2130 - }, - { - "epoch": 2.2443628736234924, - "grad_norm": 2.186831361943043, - "learning_rate": 4.32337701167117e-06, - "log_odds_chosen": 5.350895881652832, - "log_odds_ratio": -0.0246684979647398, - "logits/chosen": -2.872166156768799, - "logits/rejected": -2.8550028800964355, - "logps/chosen": -0.13888207077980042, - "logps/rejected": -3.2091636657714844, - "loss": 0.1143, - "nll_loss": 0.11629905551671982, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.006944102700799704, - "rewards/margins": 0.1535140872001648, - "rewards/rejected": -0.16045819222927094, - "step": 2140 - }, - { - "epoch": 2.2548505506030416, - "grad_norm": 2.2764409350931345, - "learning_rate": 4.313310928137537e-06, - "log_odds_chosen": 4.80722713470459, - "log_odds_ratio": -0.025547053664922714, - "logits/chosen": -2.8291611671447754, - "logits/rejected": -2.858245849609375, - "logps/chosen": -0.15937599539756775, - "logps/rejected": -2.8679497241973877, - "loss": 0.1185, - "nll_loss": 0.11574534326791763, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007968800142407417, - "rewards/margins": 0.13542868196964264, - "rewards/rejected": -0.1433974802494049, - "step": 2150 - }, - { - "epoch": 2.2653382275825904, - "grad_norm": 2.239980255447614, - "learning_rate": 4.303314829119352e-06, - "log_odds_chosen": 5.589659690856934, - "log_odds_ratio": -0.020419184118509293, - "logits/chosen": -2.905287981033325, - "logits/rejected": -2.966031551361084, - "logps/chosen": -0.1542571783065796, - "logps/rejected": -3.551201581954956, - "loss": 0.1236, - "nll_loss": 0.11697031557559967, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007712858729064465, - "rewards/margins": 0.16984722018241882, - "rewards/rejected": -0.17756007611751556, - "step": 2160 - }, - { - "epoch": 2.2758259045621396, - "grad_norm": 2.009942820215124, - "learning_rate": 4.293387907410919e-06, - "log_odds_chosen": 6.170254707336426, - "log_odds_ratio": -0.017188329249620438, - "logits/chosen": -2.848698139190674, - "logits/rejected": -2.945160388946533, - "logps/chosen": -0.13800857961177826, - "logps/rejected": -4.000069618225098, - "loss": 0.1137, - "nll_loss": 0.11105845123529434, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.006900429725646973, - "rewards/margins": 0.19310306012630463, - "rewards/rejected": -0.2000034749507904, - "step": 2170 - }, - { - "epoch": 2.2863135815416884, - "grad_norm": 2.1918079846574567, - "learning_rate": 4.2835293687811935e-06, - "log_odds_chosen": 6.479376316070557, - "log_odds_ratio": -0.010083029977977276, - "logits/chosen": -2.7919399738311768, - "logits/rejected": -2.9110770225524902, - "logps/chosen": -0.1471458077430725, - "logps/rejected": -4.402917385101318, - "loss": 0.1149, - "nll_loss": 0.12062163650989532, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007357291877269745, - "rewards/margins": 0.21278861165046692, - "rewards/rejected": -0.22014589607715607, - "step": 2180 - }, - { - "epoch": 2.2968012585212376, - "grad_norm": 1.9268306821517742, - "learning_rate": 4.273738431706883e-06, - "log_odds_chosen": 6.724373817443848, - "log_odds_ratio": -0.018149670213460922, - "logits/chosen": -2.891892194747925, - "logits/rejected": -3.004826784133911, - "logps/chosen": -0.15707895159721375, - "logps/rejected": -4.773315906524658, - "loss": 0.1119, - "nll_loss": 0.10733366012573242, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007853945717215538, - "rewards/margins": 0.23081183433532715, - "rewards/rejected": -0.23866574466228485, - "step": 2190 - }, - { - "epoch": 2.3072889355007864, - "grad_norm": 1.9131867908425575, - "learning_rate": 4.264014327112208e-06, - "log_odds_chosen": 6.2542595863342285, - "log_odds_ratio": -0.015775460749864578, - "logits/chosen": -2.862001419067383, - "logits/rejected": -2.91827654838562, - "logps/chosen": -0.14461472630500793, - "logps/rejected": -4.159193515777588, - "loss": 0.1178, - "nll_loss": 0.12322264909744263, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007230737246572971, - "rewards/margins": 0.20072893798351288, - "rewards/rejected": -0.20795968174934387, - "step": 2200 - }, - { - "epoch": 2.3072889355007864, - "eval_log_odds_chosen": 1.1627599000930786, - "eval_log_odds_ratio": -0.7777736783027649, - "eval_logits/chosen": -2.887819766998291, - "eval_logits/rejected": -2.9106638431549072, - "eval_logps/chosen": -2.4108457565307617, - "eval_logps/rejected": -3.4342026710510254, - "eval_loss": 1.1761772632598877, - "eval_nll_loss": 1.1196904182434082, - "eval_rewards/accuracies": 0.6527777910232544, - "eval_rewards/chosen": -0.12054230272769928, - "eval_rewards/margins": 0.051167842000722885, - "eval_rewards/rejected": -0.17171014845371246, - "eval_runtime": 137.1423, - "eval_samples_per_second": 14.54, - "eval_steps_per_second": 0.459, - "step": 2200 - }, - { - "epoch": 2.3177766124803356, - "grad_norm": 2.1121501905853624, - "learning_rate": 4.254356298115171e-06, - "log_odds_chosen": 6.363844394683838, - "log_odds_ratio": -0.024754000827670097, - "logits/chosen": -2.8908374309539795, - "logits/rejected": -2.9566292762756348, - "logps/chosen": -0.15381646156311035, - "logps/rejected": -4.287047386169434, - "loss": 0.1181, - "nll_loss": 0.12711365520954132, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007690823636949062, - "rewards/margins": 0.20666155219078064, - "rewards/rejected": -0.21435236930847168, - "step": 2210 - }, - { - "epoch": 2.3282642894598844, - "grad_norm": 3.84884286912148, - "learning_rate": 4.24476359978009e-06, - "log_odds_chosen": 5.530186176300049, - "log_odds_ratio": -0.017865758389234543, - "logits/chosen": -2.8787178993225098, - "logits/rejected": -2.9533944129943848, - "logps/chosen": -0.1436866670846939, - "logps/rejected": -3.488823652267456, - "loss": 0.1234, - "nll_loss": 0.11815366894006729, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0071843331679701805, - "rewards/margins": 0.16725686192512512, - "rewards/rejected": -0.17444118857383728, - "step": 2220 - }, - { - "epoch": 2.3387519664394336, - "grad_norm": 2.417106329176298, - "learning_rate": 4.235235498876268e-06, - "log_odds_chosen": 5.049867630004883, - "log_odds_ratio": -0.030804011970758438, - "logits/chosen": -2.8601975440979004, - "logits/rejected": -2.919813632965088, - "logps/chosen": -0.16016361117362976, - "logps/rejected": -3.108591079711914, - "loss": 0.1205, - "nll_loss": 0.12257065623998642, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.008008181117475033, - "rewards/margins": 0.14742138981819153, - "rewards/rejected": -0.15542957186698914, - "step": 2230 - }, - { - "epoch": 2.349239643418983, - "grad_norm": 2.0311020060176737, - "learning_rate": 4.2257712736425835e-06, - "log_odds_chosen": 6.287697792053223, - "log_odds_ratio": -0.03303173556923866, - "logits/chosen": -2.8431243896484375, - "logits/rejected": -2.987511396408081, - "logps/chosen": -0.15092086791992188, - "logps/rejected": -4.205324649810791, - "loss": 0.119, - "nll_loss": 0.11937984079122543, - "rewards/accuracies": 0.96875, - "rewards/chosen": -0.0075460439547896385, - "rewards/margins": 0.20272019505500793, - "rewards/rejected": -0.21026620268821716, - "step": 2240 - }, - { - "epoch": 2.3597273203985316, - "grad_norm": 1.8184108922544404, - "learning_rate": 4.216370213557839e-06, - "log_odds_chosen": 6.489804267883301, - "log_odds_ratio": -0.017738422378897667, - "logits/chosen": -2.8637566566467285, - "logits/rejected": -2.9882349967956543, - "logps/chosen": -0.1367037147283554, - "logps/rejected": -4.3643412590026855, - "loss": 0.1103, - "nll_loss": 0.10625318437814713, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006835184991359711, - "rewards/margins": 0.21138188242912292, - "rewards/rejected": -0.21821708977222443, - "step": 2250 - }, - { - "epoch": 2.370214997378081, - "grad_norm": 1.9927993897844196, - "learning_rate": 4.207031619116713e-06, - "log_odds_chosen": 6.5232744216918945, - "log_odds_ratio": -0.02112133800983429, - "logits/chosen": -2.888134002685547, - "logits/rejected": -2.9766697883605957, - "logps/chosen": -0.13985328376293182, - "logps/rejected": -4.443106174468994, - "loss": 0.1119, - "nll_loss": 0.10387493669986725, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.006992665119469166, - "rewards/margins": 0.21516263484954834, - "rewards/rejected": -0.22215530276298523, - "step": 2260 - }, - { - "epoch": 2.3807026743576296, - "grad_norm": 1.9179118979680037, - "learning_rate": 4.197754801611136e-06, - "log_odds_chosen": 7.000714302062988, - "log_odds_ratio": -0.01941884122788906, - "logits/chosen": -2.8880743980407715, - "logits/rejected": -3.0280842781066895, - "logps/chosen": -0.1594962626695633, - "logps/rejected": -4.991673946380615, - "loss": 0.1187, - "nll_loss": 0.12734182178974152, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007974812760949135, - "rewards/margins": 0.2416088581085205, - "rewards/rejected": -0.2495836764574051, - "step": 2270 - }, - { - "epoch": 2.391190351337179, - "grad_norm": 1.7656016453383905, - "learning_rate": 4.188539082916955e-06, - "log_odds_chosen": 5.81030797958374, - "log_odds_ratio": -0.02714763581752777, - "logits/chosen": -2.858682155609131, - "logits/rejected": -2.961153030395508, - "logps/chosen": -0.1495695412158966, - "logps/rejected": -3.7413382530212402, - "loss": 0.117, - "nll_loss": 0.1129683405160904, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007478476967662573, - "rewards/margins": 0.1795884370803833, - "rewards/rejected": -0.18706689774990082, - "step": 2280 - }, - { - "epoch": 2.401678028316728, - "grad_norm": 1.7721263332581463, - "learning_rate": 4.179383795285729e-06, - "log_odds_chosen": 6.099682807922363, - "log_odds_ratio": -0.016452614217996597, - "logits/chosen": -2.8671703338623047, - "logits/rejected": -2.94566011428833, - "logps/chosen": -0.1470957249403, - "logps/rejected": -4.025435447692871, - "loss": 0.1162, - "nll_loss": 0.1030157208442688, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007354786153882742, - "rewards/margins": 0.19391697645187378, - "rewards/rejected": -0.20127174258232117, - "step": 2290 - }, - { - "epoch": 2.412165705296277, - "grad_norm": 6.518126509500433, - "learning_rate": 4.170288281141496e-06, - "log_odds_chosen": 5.677874565124512, - "log_odds_ratio": -0.02623058296740055, - "logits/chosen": -2.8755476474761963, - "logits/rejected": -2.926180362701416, - "logps/chosen": -0.15929332375526428, - "logps/rejected": -3.627763271331787, - "loss": 0.1184, - "nll_loss": 0.12096776813268661, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007964666932821274, - "rewards/margins": 0.17342346906661987, - "rewards/rejected": -0.18138816952705383, - "step": 2300 - }, - { - "epoch": 2.412165705296277, - "eval_log_odds_chosen": 1.3232934474945068, - "eval_log_odds_ratio": -1.0561914443969727, - "eval_logits/chosen": -2.9102423191070557, - "eval_logits/rejected": -2.9226319789886475, - "eval_logps/chosen": -3.8695833683013916, - "eval_logps/rejected": -5.081162452697754, - "eval_loss": 1.8519541025161743, - "eval_nll_loss": 1.7541913986206055, - "eval_rewards/accuracies": 0.636904776096344, - "eval_rewards/chosen": -0.19347918033599854, - "eval_rewards/margins": 0.06057893857359886, - "eval_rewards/rejected": -0.2540581226348877, - "eval_runtime": 140.6912, - "eval_samples_per_second": 14.173, - "eval_steps_per_second": 0.448, - "step": 2300 - }, - { - "epoch": 2.422653382275826, - "grad_norm": 2.1350280555835317, - "learning_rate": 4.1612518928823956e-06, - "log_odds_chosen": 5.239171028137207, - "log_odds_ratio": -0.0356699600815773, - "logits/chosen": -2.8127808570861816, - "logits/rejected": -2.847365140914917, - "logps/chosen": -0.17353428900241852, - "logps/rejected": -3.4219677448272705, - "loss": 0.1197, - "nll_loss": 0.12273728847503662, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.008676714263856411, - "rewards/margins": 0.16242167353630066, - "rewards/rejected": -0.17109838128089905, - "step": 2310 - }, - { - "epoch": 2.433141059255375, - "grad_norm": 2.142764154815985, - "learning_rate": 4.1522739926869985e-06, - "log_odds_chosen": 7.10500431060791, - "log_odds_ratio": -0.02759629487991333, - "logits/chosen": -2.8841793537139893, - "logits/rejected": -2.979490280151367, - "logps/chosen": -0.15857262909412384, - "logps/rejected": -5.118218898773193, - "loss": 0.1179, - "nll_loss": 0.11995577812194824, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007928632199764252, - "rewards/margins": 0.24798233807086945, - "rewards/rejected": -0.2559109628200531, - "step": 2320 - }, - { - "epoch": 2.443628736234924, - "grad_norm": 2.442748493026814, - "learning_rate": 4.143353952325209e-06, - "log_odds_chosen": 6.4824538230896, - "log_odds_ratio": -0.03863966092467308, - "logits/chosen": -2.8798575401306152, - "logits/rejected": -2.975369691848755, - "logps/chosen": -0.16273298859596252, - "logps/rejected": -4.518317222595215, - "loss": 0.1144, - "nll_loss": 0.11924872547388077, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.008136649616062641, - "rewards/margins": 0.21777920424938202, - "rewards/rejected": -0.22591586410999298, - "step": 2330 - }, - { - "epoch": 2.454116413214473, - "grad_norm": 1.7906952084031593, - "learning_rate": 4.134491152973616e-06, - "log_odds_chosen": 6.330552101135254, - "log_odds_ratio": -0.019993215799331665, - "logits/chosen": -2.903748035430908, - "logits/rejected": -2.961629629135132, - "logps/chosen": -0.1506245732307434, - "logps/rejected": -4.29229736328125, - "loss": 0.1162, - "nll_loss": 0.11873211711645126, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.0075312284752726555, - "rewards/margins": 0.20708362758159637, - "rewards/rejected": -0.2146148979663849, - "step": 2340 - }, - { - "epoch": 2.464604090194022, - "grad_norm": 2.709543224621687, - "learning_rate": 4.125684985035174e-06, - "log_odds_chosen": 6.674917697906494, - "log_odds_ratio": -0.02191847935318947, - "logits/chosen": -2.869702100753784, - "logits/rejected": -2.9517292976379395, - "logps/chosen": -0.14587149024009705, - "logps/rejected": -4.594050407409668, - "loss": 0.1189, - "nll_loss": 0.11958177387714386, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007293573580682278, - "rewards/margins": 0.2224089354276657, - "rewards/rejected": -0.22970251739025116, - "step": 2350 - }, - { - "epoch": 2.475091767173571, - "grad_norm": 1.9596617726605967, - "learning_rate": 4.116934847963092e-06, - "log_odds_chosen": 6.008196830749512, - "log_odds_ratio": -0.020748203620314598, - "logits/chosen": -2.859504222869873, - "logits/rejected": -2.9086391925811768, - "logps/chosen": -0.1603454202413559, - "logps/rejected": -4.055342674255371, - "loss": 0.1137, - "nll_loss": 0.11717329174280167, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008017271757125854, - "rewards/margins": 0.1947498619556427, - "rewards/rejected": -0.20276716351509094, - "step": 2360 - }, - { - "epoch": 2.48557944415312, - "grad_norm": 25.11227763431921, - "learning_rate": 4.1082401500888055e-06, - "log_odds_chosen": 6.279742240905762, - "log_odds_ratio": -0.01569024845957756, - "logits/chosen": -2.916944742202759, - "logits/rejected": -2.987224578857422, - "logps/chosen": -0.14050395786762238, - "logps/rejected": -4.152866363525391, - "loss": 0.1189, - "nll_loss": 0.10722777992486954, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007025198079645634, - "rewards/margins": 0.20061811804771423, - "rewards/rejected": -0.2076433151960373, - "step": 2370 - }, - { - "epoch": 2.4960671211326693, - "grad_norm": 1.757332945919827, - "learning_rate": 4.099600308453939e-06, - "log_odds_chosen": 6.39632511138916, - "log_odds_ratio": -0.023090779781341553, - "logits/chosen": -2.8743884563446045, - "logits/rejected": -2.9668736457824707, - "logps/chosen": -0.15729930996894836, - "logps/rejected": -4.314006328582764, - "loss": 0.1177, - "nll_loss": 0.1209021583199501, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007864965125918388, - "rewards/margins": 0.2078353613615036, - "rewards/rejected": -0.21570034325122833, - "step": 2380 - }, - { - "epoch": 2.506554798112218, - "grad_norm": 2.0524680636282056, - "learning_rate": 4.091014748646132e-06, - "log_odds_chosen": 5.9223713874816895, - "log_odds_ratio": -0.030582841485738754, - "logits/chosen": -2.8992161750793457, - "logits/rejected": -2.929603099822998, - "logps/chosen": -0.1705484390258789, - "logps/rejected": -4.027953147888184, - "loss": 0.1189, - "nll_loss": 0.10802364349365234, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.008527422323822975, - "rewards/margins": 0.19287024438381195, - "rewards/rejected": -0.20139765739440918, - "step": 2390 - }, - { - "epoch": 2.5170424750917673, - "grad_norm": 1.7245638696745784, - "learning_rate": 4.082482904638631e-06, - "log_odds_chosen": 6.324474811553955, - "log_odds_ratio": -0.018949782475829124, - "logits/chosen": -2.8749866485595703, - "logits/rejected": -2.9224321842193604, - "logps/chosen": -0.1520567536354065, - "logps/rejected": -4.290619850158691, - "loss": 0.1172, - "nll_loss": 0.12284000217914581, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.00760283786803484, - "rewards/margins": 0.20692817866802216, - "rewards/rejected": -0.21453101933002472, - "step": 2400 - }, - { - "epoch": 2.5170424750917673, - "eval_log_odds_chosen": 1.0075438022613525, - "eval_log_odds_ratio": -0.8145382404327393, - "eval_logits/chosen": -2.8560779094696045, - "eval_logits/rejected": -2.871006965637207, - "eval_logps/chosen": -2.0024044513702393, - "eval_logps/rejected": -2.8670685291290283, - "eval_loss": 1.01926589012146, - "eval_nll_loss": 0.9735569357872009, - "eval_rewards/accuracies": 0.6408730149269104, - "eval_rewards/chosen": -0.10012022405862808, - "eval_rewards/margins": 0.043233200907707214, - "eval_rewards/rejected": -0.1433534324169159, - "eval_runtime": 138.4847, - "eval_samples_per_second": 14.399, - "eval_steps_per_second": 0.455, - "step": 2400 - }, - { - "epoch": 2.527530152071316, - "grad_norm": 2.140192470773612, - "learning_rate": 4.074004218633553e-06, - "log_odds_chosen": 6.169337272644043, - "log_odds_ratio": -0.024398522451519966, - "logits/chosen": -2.8802199363708496, - "logits/rejected": -2.9575634002685547, - "logps/chosen": -0.14228537678718567, - "logps/rejected": -4.140218257904053, - "loss": 0.1204, - "nll_loss": 0.10762319713830948, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.0071142688393592834, - "rewards/margins": 0.1998966485261917, - "rewards/rejected": -0.20701093971729279, - "step": 2410 - }, - { - "epoch": 2.5380178290508653, - "grad_norm": 1.9307036538867832, - "learning_rate": 4.065578140908709e-06, - "log_odds_chosen": 6.545037269592285, - "log_odds_ratio": -0.020819999277591705, - "logits/chosen": -2.826190948486328, - "logits/rejected": -2.9180386066436768, - "logps/chosen": -0.15343733131885529, - "logps/rejected": -4.550530433654785, - "loss": 0.1292, - "nll_loss": 0.12483732402324677, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007671866565942764, - "rewards/margins": 0.2198546677827835, - "rewards/rejected": -0.22752651572227478, - "step": 2420 - }, - { - "epoch": 2.5485055060304145, - "grad_norm": 2.472322893814309, - "learning_rate": 4.057204129667897e-06, - "log_odds_chosen": 6.510749816894531, - "log_odds_ratio": -0.017572391778230667, - "logits/chosen": -2.8476340770721436, - "logits/rejected": -2.9206082820892334, - "logps/chosen": -0.1623007208108902, - "logps/rejected": -4.547110557556152, - "loss": 0.114, - "nll_loss": 0.11619551479816437, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.00811503641307354, - "rewards/margins": 0.21924051642417908, - "rewards/rejected": -0.22735556960105896, - "step": 2430 - }, - { - "epoch": 2.5589931830099633, - "grad_norm": 3.562558849555077, - "learning_rate": 4.048881650894581e-06, - "log_odds_chosen": 7.486746311187744, - "log_odds_ratio": -0.012338453903794289, - "logits/chosen": -2.8392252922058105, - "logits/rejected": -2.924240827560425, - "logps/chosen": -0.15012109279632568, - "logps/rejected": -5.4815144538879395, - "loss": 0.1213, - "nll_loss": 0.12608163058757782, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007506055291742086, - "rewards/margins": 0.26656967401504517, - "rewards/rejected": -0.2740757167339325, - "step": 2440 - }, - { - "epoch": 2.5694808599895125, - "grad_norm": 2.3252293901649193, - "learning_rate": 4.040610178208843e-06, - "log_odds_chosen": 7.7740631103515625, - "log_odds_ratio": -0.0118449367582798, - "logits/chosen": -2.795551061630249, - "logits/rejected": -2.8945860862731934, - "logps/chosen": -0.1522868573665619, - "logps/rejected": -5.739714622497559, - "loss": 0.1145, - "nll_loss": 0.11489256471395493, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007614342961460352, - "rewards/margins": 0.27937138080596924, - "rewards/rejected": -0.28698569536209106, - "step": 2450 - }, - { - "epoch": 2.5799685369690613, - "grad_norm": 2.0157957603988175, - "learning_rate": 4.032389192727559e-06, - "log_odds_chosen": 6.265582084655762, - "log_odds_ratio": -0.024669019505381584, - "logits/chosen": -2.85023832321167, - "logits/rejected": -2.8876233100891113, - "logps/chosen": -0.150896817445755, - "logps/rejected": -4.219937324523926, - "loss": 0.1277, - "nll_loss": 0.12799417972564697, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007544840686023235, - "rewards/margins": 0.20345202088356018, - "rewards/rejected": -0.2109968364238739, - "step": 2460 - }, - { - "epoch": 2.5904562139486105, - "grad_norm": 2.287376161767263, - "learning_rate": 4.024218182927669e-06, - "log_odds_chosen": 6.810778617858887, - "log_odds_ratio": -0.013128559105098248, - "logits/chosen": -2.823387622833252, - "logits/rejected": -2.879467487335205, - "logps/chosen": -0.15397700667381287, - "logps/rejected": -4.72897481918335, - "loss": 0.1209, - "nll_loss": 0.12541964650154114, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.0076988511718809605, - "rewards/margins": 0.22874990105628967, - "rewards/rejected": -0.236448734998703, - "step": 2470 - }, - { - "epoch": 2.6009438909281593, - "grad_norm": 2.2717126345189547, - "learning_rate": 4.016096644512495e-06, - "log_odds_chosen": 6.199719429016113, - "log_odds_ratio": -0.018437180668115616, - "logits/chosen": -2.8248672485351562, - "logits/rejected": -2.8656277656555176, - "logps/chosen": -0.14331553876399994, - "logps/rejected": -4.071486949920654, - "loss": 0.1196, - "nll_loss": 0.11505875736474991, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.00716577610000968, - "rewards/margins": 0.19640859961509705, - "rewards/rejected": -0.20357437431812286, - "step": 2480 - }, - { - "epoch": 2.6114315679077085, - "grad_norm": 2.1379482021716036, - "learning_rate": 4.008024080281012e-06, - "log_odds_chosen": 7.395205497741699, - "log_odds_ratio": -0.01522077340632677, - "logits/chosen": -2.8720109462738037, - "logits/rejected": -2.936903476715088, - "logps/chosen": -0.13911715149879456, - "logps/rejected": -5.221936225891113, - "loss": 0.12, - "nll_loss": 0.12369368225336075, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.0069558583199977875, - "rewards/margins": 0.2541409730911255, - "rewards/rejected": -0.2610968351364136, - "step": 2490 - }, - { - "epoch": 2.6219192448872572, - "grad_norm": 1.7439578923515293, - "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 8.536567687988281, - "log_odds_ratio": -0.02061418630182743, - "logits/chosen": -2.854001760482788, - "logits/rejected": -2.9489758014678955, - "logps/chosen": -0.1588824838399887, - "logps/rejected": -6.567204475402832, - "loss": 0.1109, - "nll_loss": 0.11326327174901962, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007944123819470406, - "rewards/margins": 0.32041609287261963, - "rewards/rejected": -0.3283601999282837, - "step": 2500 - }, - { - "epoch": 2.6219192448872572, - "eval_log_odds_chosen": 1.0766297578811646, - "eval_log_odds_ratio": -0.9767945408821106, - "eval_logits/chosen": -2.8457064628601074, - "eval_logits/rejected": -2.857062339782715, - "eval_logps/chosen": -2.4182989597320557, - "eval_logps/rejected": -3.354691743850708, - "eval_loss": 1.2049823999404907, - "eval_nll_loss": 1.172393560409546, - "eval_rewards/accuracies": 0.6329365372657776, - "eval_rewards/chosen": -0.12091495096683502, - "eval_rewards/margins": 0.046819645911455154, - "eval_rewards/rejected": -0.1677345633506775, - "eval_runtime": 137.7801, - "eval_samples_per_second": 14.472, - "eval_steps_per_second": 0.457, - "step": 2500 - }, - { - "epoch": 2.6324069218668065, - "grad_norm": 3.8704567483353496, - "learning_rate": 3.992023920278996e-06, - "log_odds_chosen": 6.979190826416016, - "log_odds_ratio": -0.018384801223874092, - "logits/chosen": -2.8529200553894043, - "logits/rejected": -2.923466920852661, - "logps/chosen": -0.14472463726997375, - "logps/rejected": -4.871707916259766, - "loss": 0.1127, - "nll_loss": 0.1109754890203476, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007236232049763203, - "rewards/margins": 0.23634913563728333, - "rewards/rejected": -0.24358537793159485, - "step": 2510 - }, - { - "epoch": 2.6428945988463557, - "grad_norm": 2.0243407054263933, - "learning_rate": 3.984095364447979e-06, - "log_odds_chosen": 6.955283164978027, - "log_odds_ratio": -0.026280570775270462, - "logits/chosen": -2.845829486846924, - "logits/rejected": -2.9166336059570312, - "logps/chosen": -0.1561572551727295, - "logps/rejected": -4.968081474304199, - "loss": 0.1245, - "nll_loss": 0.11139287799596786, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.0078078629449009895, - "rewards/margins": 0.2405962496995926, - "rewards/rejected": -0.2484041005373001, - "step": 2520 - }, - { - "epoch": 2.6533822758259045, - "grad_norm": 2.159445384644007, - "learning_rate": 3.97621386243772e-06, - "log_odds_chosen": 8.654619216918945, - "log_odds_ratio": -0.015728970989584923, - "logits/chosen": -2.815493583679199, - "logits/rejected": -2.9511656761169434, - "logps/chosen": -0.1413796991109848, - "logps/rejected": -6.552220821380615, - "loss": 0.1201, - "nll_loss": 0.11258909851312637, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007068985607475042, - "rewards/margins": 0.32054203748703003, - "rewards/rejected": -0.3276110291481018, - "step": 2530 - }, - { - "epoch": 2.6638699528054537, - "grad_norm": 2.5062335927036123, - "learning_rate": 3.9683789506627254e-06, - "log_odds_chosen": 7.7274370193481445, - "log_odds_ratio": -0.020870521664619446, - "logits/chosen": -2.8319153785705566, - "logits/rejected": -2.922696113586426, - "logps/chosen": -0.15536390244960785, - "logps/rejected": -5.693093776702881, - "loss": 0.1181, - "nll_loss": 0.10906670987606049, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007768194191157818, - "rewards/margins": 0.2768864631652832, - "rewards/rejected": -0.2846546769142151, - "step": 2540 - }, - { - "epoch": 2.6743576297850025, - "grad_norm": 1.970994291017683, - "learning_rate": 3.960590171906698e-06, - "log_odds_chosen": 7.434384822845459, - "log_odds_ratio": -0.023785177618265152, - "logits/chosen": -2.7982025146484375, - "logits/rejected": -2.8931427001953125, - "logps/chosen": -0.16477976739406586, - "logps/rejected": -5.395650386810303, - "loss": 0.1221, - "nll_loss": 0.13674572110176086, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.008238988928496838, - "rewards/margins": 0.2615435719490051, - "rewards/rejected": -0.26978254318237305, - "step": 2550 - }, - { - "epoch": 2.6848453067645517, - "grad_norm": 2.0205686734736594, - "learning_rate": 3.952847075210474e-06, - "log_odds_chosen": 7.365771293640137, - "log_odds_ratio": -0.01570904441177845, - "logits/chosen": -2.866798162460327, - "logits/rejected": -2.959561347961426, - "logps/chosen": -0.14348378777503967, - "logps/rejected": -5.177813529968262, - "loss": 0.1204, - "nll_loss": 0.12037654966115952, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007174189202487469, - "rewards/margins": 0.2517164647579193, - "rewards/rejected": -0.25889068841934204, - "step": 2560 - }, - { - "epoch": 2.695332983744101, - "grad_norm": 1.8761709200806869, - "learning_rate": 3.9451492157623585e-06, - "log_odds_chosen": 8.670493125915527, - "log_odds_ratio": -0.011763294227421284, - "logits/chosen": -2.8013434410095215, - "logits/rejected": -2.920924425125122, - "logps/chosen": -0.16095298528671265, - "logps/rejected": -6.665195465087891, - "loss": 0.1166, - "nll_loss": 0.13346998393535614, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008047649636864662, - "rewards/margins": 0.32521215081214905, - "rewards/rejected": -0.3332597613334656, - "step": 2570 - }, - { - "epoch": 2.7058206607236497, - "grad_norm": 2.1285971867573408, - "learning_rate": 3.937496154790789e-06, - "log_odds_chosen": 7.294459342956543, - "log_odds_ratio": -0.018316376954317093, - "logits/chosen": -2.816880702972412, - "logits/rejected": -2.8812124729156494, - "logps/chosen": -0.13620439171791077, - "logps/rejected": -5.142992973327637, - "loss": 0.1195, - "nll_loss": 0.10606805980205536, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.0068102204240858555, - "rewards/margins": 0.25033941864967346, - "rewards/rejected": -0.2571496367454529, - "step": 2580 - }, - { - "epoch": 2.716308337703199, - "grad_norm": 2.400899470701997, - "learning_rate": 3.9298874594592975e-06, - "log_odds_chosen": 8.10938549041748, - "log_odds_ratio": -0.016252661123871803, - "logits/chosen": -2.807111978530884, - "logits/rejected": -2.915724515914917, - "logps/chosen": -0.15417781472206116, - "logps/rejected": -6.080683708190918, - "loss": 0.1163, - "nll_loss": 0.11585485935211182, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007708890829235315, - "rewards/margins": 0.2963252663612366, - "rewards/rejected": -0.30403420329093933, - "step": 2590 - }, - { - "epoch": 2.7267960146827477, - "grad_norm": 3.318597907364317, - "learning_rate": 3.922322702763682e-06, - "log_odds_chosen": 8.183881759643555, - "log_odds_ratio": -0.021557733416557312, - "logits/chosen": -2.8544585704803467, - "logits/rejected": -2.9738879203796387, - "logps/chosen": -0.14029571413993835, - "logps/rejected": -6.104724884033203, - "loss": 0.1238, - "nll_loss": 0.11269497871398926, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007014785893261433, - "rewards/margins": 0.2982214391231537, - "rewards/rejected": -0.30523625016212463, - "step": 2600 - }, - { - "epoch": 2.7267960146827477, - "eval_log_odds_chosen": 1.6673794984817505, - "eval_log_odds_ratio": -1.6934312582015991, - "eval_logits/chosen": -2.9804697036743164, - "eval_logits/rejected": -2.996739387512207, - "eval_logps/chosen": -6.072526454925537, - "eval_logps/rejected": -7.644432067871094, - "eval_loss": 2.6922054290771484, - "eval_nll_loss": 2.6498186588287354, - "eval_rewards/accuracies": 0.5873016119003296, - "eval_rewards/chosen": -0.30362632870674133, - "eval_rewards/margins": 0.07859525829553604, - "eval_rewards/rejected": -0.38222160935401917, - "eval_runtime": 136.8599, - "eval_samples_per_second": 14.57, - "eval_steps_per_second": 0.46, - "step": 2600 - }, - { - "epoch": 2.737283691662297, - "grad_norm": 2.23878079697452, - "learning_rate": 3.914801463431357e-06, - "log_odds_chosen": 7.083222389221191, - "log_odds_ratio": -0.02951228991150856, - "logits/chosen": -2.8593714237213135, - "logits/rejected": -2.9374592304229736, - "logps/chosen": -0.14687521755695343, - "logps/rejected": -5.056353569030762, - "loss": 0.1245, - "nll_loss": 0.11392644792795181, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007343760691583157, - "rewards/margins": 0.24547390639781952, - "rewards/rejected": -0.25281769037246704, - "step": 2610 - }, - { - "epoch": 2.7477713686418457, - "grad_norm": 3.0293992863459636, - "learning_rate": 3.907323325822818e-06, - "log_odds_chosen": 5.10004997253418, - "log_odds_ratio": -0.032727014273405075, - "logits/chosen": -2.780730962753296, - "logits/rejected": -2.8234589099884033, - "logps/chosen": -0.14557409286499023, - "logps/rejected": -3.112699031829834, - "loss": 0.1196, - "nll_loss": 0.1244465708732605, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007278704084455967, - "rewards/margins": 0.14835625886917114, - "rewards/rejected": -0.15563495457172394, - "step": 2620 - }, - { - "epoch": 2.758259045621395, - "grad_norm": 2.2549688272537094, - "learning_rate": 3.8998878798351596e-06, - "log_odds_chosen": 5.7140727043151855, - "log_odds_ratio": -0.026816044002771378, - "logits/chosen": -2.864112377166748, - "logits/rejected": -2.8956217765808105, - "logps/chosen": -0.14010892808437347, - "logps/rejected": -3.677777051925659, - "loss": 0.1148, - "nll_loss": 0.11140565574169159, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007005447056144476, - "rewards/margins": 0.17688342928886414, - "rewards/rejected": -0.18388888239860535, - "step": 2630 - }, - { - "epoch": 2.7687467226009437, - "grad_norm": 2.3361581110737384, - "learning_rate": 3.892494720807615e-06, - "log_odds_chosen": 6.5437517166137695, - "log_odds_ratio": -0.02287450060248375, - "logits/chosen": -2.835170269012451, - "logits/rejected": -2.904600143432617, - "logps/chosen": -0.15383225679397583, - "logps/rejected": -4.582453727722168, - "loss": 0.1163, - "nll_loss": 0.1210094466805458, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007691613398492336, - "rewards/margins": 0.22143109142780304, - "rewards/rejected": -0.22912268340587616, - "step": 2640 - }, - { - "epoch": 2.779234399580493, - "grad_norm": 2.113727988806721, - "learning_rate": 3.885143449429057e-06, - "log_odds_chosen": 8.709664344787598, - "log_odds_ratio": -0.01187268365174532, - "logits/chosen": -2.8075308799743652, - "logits/rejected": -2.8737902641296387, - "logps/chosen": -0.15384691953659058, - "logps/rejected": -6.678023338317871, - "loss": 0.1126, - "nll_loss": 0.11222463846206665, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007692346815019846, - "rewards/margins": 0.32620885968208313, - "rewards/rejected": -0.33390119671821594, - "step": 2650 - }, - { - "epoch": 2.789722076560042, - "grad_norm": 2.1767794366513376, - "learning_rate": 3.877833671647406e-06, - "log_odds_chosen": 7.380768775939941, - "log_odds_ratio": -0.028077024966478348, - "logits/chosen": -2.793292999267578, - "logits/rejected": -2.8911733627319336, - "logps/chosen": -0.15328237414360046, - "logps/rejected": -5.426938533782959, - "loss": 0.1168, - "nll_loss": 0.11543625593185425, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.0076641179621219635, - "rewards/margins": 0.26368287205696106, - "rewards/rejected": -0.27134692668914795, - "step": 2660 - }, - { - "epoch": 2.800209753539591, - "grad_norm": 2.256877035979117, - "learning_rate": 3.870564998580918e-06, - "log_odds_chosen": 8.639537811279297, - "log_odds_ratio": -0.022679299116134644, - "logits/chosen": -2.811685085296631, - "logits/rejected": -2.9056103229522705, - "logps/chosen": -0.15335455536842346, - "logps/rejected": -6.6522955894470215, - "loss": 0.1172, - "nll_loss": 0.1345623880624771, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.007667726371437311, - "rewards/margins": 0.3249470591545105, - "rewards/rejected": -0.3326147794723511, - "step": 2670 - }, - { - "epoch": 2.81069743051914, - "grad_norm": 2.0730722454139485, - "learning_rate": 3.863337046431279e-06, - "log_odds_chosen": 6.9750657081604, - "log_odds_ratio": -0.025320613756775856, - "logits/chosen": -2.7947394847869873, - "logits/rejected": -2.846017360687256, - "logps/chosen": -0.13509753346443176, - "logps/rejected": -4.8464508056640625, - "loss": 0.1193, - "nll_loss": 0.10888632386922836, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006754877511411905, - "rewards/margins": 0.23556765913963318, - "rewards/rejected": -0.24232256412506104, - "step": 2680 - }, - { - "epoch": 2.821185107498689, - "grad_norm": 1.9858072033613254, - "learning_rate": 3.8561494363984955e-06, - "log_odds_chosen": 9.771112442016602, - "log_odds_ratio": -0.013731351122260094, - "logits/chosen": -2.8062682151794434, - "logits/rejected": -2.9753849506378174, - "logps/chosen": -0.14906486868858337, - "logps/rejected": -7.731194496154785, - "loss": 0.1179, - "nll_loss": 0.11920718103647232, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007453243248164654, - "rewards/margins": 0.37910646200180054, - "rewards/rejected": -0.38655975461006165, - "step": 2690 - }, - { - "epoch": 2.831672784478238, - "grad_norm": 1.6847580595509726, - "learning_rate": 3.849001794597506e-06, - "log_odds_chosen": 7.8019118309021, - "log_odds_ratio": -0.019792212173342705, - "logits/chosen": -2.8470611572265625, - "logits/rejected": -2.9447550773620605, - "logps/chosen": -0.15314054489135742, - "logps/rejected": -5.769678115844727, - "loss": 0.1192, - "nll_loss": 0.11755287647247314, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007657027803361416, - "rewards/margins": 0.2808268666267395, - "rewards/rejected": -0.2884839177131653, - "step": 2700 - }, - { - "epoch": 2.831672784478238, - "eval_log_odds_chosen": 1.020140528678894, - "eval_log_odds_ratio": -0.950748860836029, - "eval_logits/chosen": -2.866152763366699, - "eval_logits/rejected": -2.883617877960205, - "eval_logps/chosen": -2.3778645992279053, - "eval_logps/rejected": -3.2670860290527344, - "eval_loss": 1.2390626668930054, - "eval_nll_loss": 1.1910258531570435, - "eval_rewards/accuracies": 0.625, - "eval_rewards/chosen": -0.11889322102069855, - "eval_rewards/margins": 0.04446107894182205, - "eval_rewards/rejected": -0.16335429251194, - "eval_runtime": 137.1045, - "eval_samples_per_second": 14.544, - "eval_steps_per_second": 0.46, - "step": 2700 - }, - { - "epoch": 2.8421604614577873, - "grad_norm": 2.227062658222717, - "learning_rate": 3.841893751976493e-06, - "log_odds_chosen": 6.429055690765381, - "log_odds_ratio": -0.025566572323441505, - "logits/chosen": -2.8230857849121094, - "logits/rejected": -2.9232447147369385, - "logps/chosen": -0.13817086815834045, - "logps/rejected": -4.313010215759277, - "loss": 0.1236, - "nll_loss": 0.1359073519706726, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.006908542010933161, - "rewards/margins": 0.20874197781085968, - "rewards/rejected": -0.2156505137681961, - "step": 2710 - }, - { - "epoch": 2.852648138437336, - "grad_norm": 2.108179677461151, - "learning_rate": 3.834824944236852e-06, - "log_odds_chosen": 7.687928676605225, - "log_odds_ratio": -0.019871855154633522, - "logits/chosen": -2.9058802127838135, - "logits/rejected": -3.016103744506836, - "logps/chosen": -0.15432411432266235, - "logps/rejected": -5.692026615142822, - "loss": 0.1226, - "nll_loss": 0.12474212795495987, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.0077162072993814945, - "rewards/margins": 0.27688512206077576, - "rewards/rejected": -0.2846013009548187, - "step": 2720 - }, - { - "epoch": 2.863135815416885, - "grad_norm": 2.0852362976431627, - "learning_rate": 3.827795011754764e-06, - "log_odds_chosen": 7.531012058258057, - "log_odds_ratio": -0.020183496177196503, - "logits/chosen": -2.9127936363220215, - "logits/rejected": -3.042579174041748, - "logps/chosen": -0.1713821142911911, - "logps/rejected": -5.637821197509766, - "loss": 0.1192, - "nll_loss": 0.1238013282418251, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.00856910552829504, - "rewards/margins": 0.2733219265937805, - "rewards/rejected": -0.2818910479545593, - "step": 2730 - }, - { - "epoch": 2.873623492396434, - "grad_norm": 2.1240217329220727, - "learning_rate": 3.8208035995043505e-06, - "log_odds_chosen": 7.918447017669678, - "log_odds_ratio": -0.016450051218271255, - "logits/chosen": -2.9222500324249268, - "logits/rejected": -3.0099682807922363, - "logps/chosen": -0.16613063216209412, - "logps/rejected": -5.923202037811279, - "loss": 0.1167, - "nll_loss": 0.11456701904535294, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00830653216689825, - "rewards/margins": 0.28785353899002075, - "rewards/rejected": -0.2961600720882416, - "step": 2740 - }, - { - "epoch": 2.8841111693759833, - "grad_norm": 31.79228564478535, - "learning_rate": 3.8138503569823697e-06, - "log_odds_chosen": 6.909941673278809, - "log_odds_ratio": -0.009971695020794868, - "logits/chosen": -2.913257598876953, - "logits/rejected": -3.0123419761657715, - "logps/chosen": -0.14221827685832977, - "logps/rejected": -4.7533063888549805, - "loss": 0.1366, - "nll_loss": 0.12416551262140274, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007110914681106806, - "rewards/margins": 0.23055438697338104, - "rewards/rejected": -0.2376653254032135, - "step": 2750 - }, - { - "epoch": 2.894598846355532, - "grad_norm": 1.9557051281290665, - "learning_rate": 3.806934938134405e-06, - "log_odds_chosen": 6.693169593811035, - "log_odds_ratio": -0.02671411633491516, - "logits/chosen": -2.8386614322662354, - "logits/rejected": -2.913949966430664, - "logps/chosen": -0.158113032579422, - "logps/rejected": -4.6884589195251465, - "loss": 0.1257, - "nll_loss": 0.13248762488365173, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.007905651815235615, - "rewards/margins": 0.22651728987693787, - "rewards/rejected": -0.23442292213439941, - "step": 2760 - }, - { - "epoch": 2.9050865233350813, - "grad_norm": 2.137070948069414, - "learning_rate": 3.800057001282532e-06, - "log_odds_chosen": 7.526410102844238, - "log_odds_ratio": -0.018288953229784966, - "logits/chosen": -2.8420822620391846, - "logits/rejected": -2.9359934329986572, - "logps/chosen": -0.13937655091285706, - "logps/rejected": -5.3555192947387695, - "loss": 0.1203, - "nll_loss": 0.11602024734020233, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.00696882838383317, - "rewards/margins": 0.2608071565628052, - "rewards/rejected": -0.2677759826183319, - "step": 2770 - }, - { - "epoch": 2.91557420031463, - "grad_norm": 1.9039164114563458, - "learning_rate": 3.7932162090544085e-06, - "log_odds_chosen": 8.005070686340332, - "log_odds_ratio": -0.013831285759806633, - "logits/chosen": -2.85080885887146, - "logits/rejected": -2.9412410259246826, - "logps/chosen": -0.14242660999298096, - "logps/rejected": -5.835131645202637, - "loss": 0.115, - "nll_loss": 0.11129038035869598, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007121330592781305, - "rewards/margins": 0.2846352159976959, - "rewards/rejected": -0.2917565703392029, - "step": 2780 - }, - { - "epoch": 2.9260618772941793, - "grad_norm": 1.9066238493747631, - "learning_rate": 3.7864122283137657e-06, - "log_odds_chosen": 8.59681510925293, - "log_odds_ratio": -0.01634146459400654, - "logits/chosen": -2.811566114425659, - "logits/rejected": -2.953697681427002, - "logps/chosen": -0.1852981150150299, - "logps/rejected": -6.696959495544434, - "loss": 0.1237, - "nll_loss": 0.13221383094787598, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009264904074370861, - "rewards/margins": 0.3255830705165863, - "rewards/rejected": -0.33484798669815063, - "step": 2790 - }, - { - "epoch": 2.9365495542737285, - "grad_norm": 2.1229204349942523, - "learning_rate": 3.7796447300922724e-06, - "log_odds_chosen": 8.886019706726074, - "log_odds_ratio": -0.014133910648524761, - "logits/chosen": -2.8244338035583496, - "logits/rejected": -2.9361133575439453, - "logps/chosen": -0.1553722470998764, - "logps/rejected": -6.724435329437256, - "loss": 0.1191, - "nll_loss": 0.11856858432292938, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007768611423671246, - "rewards/margins": 0.3284532129764557, - "rewards/rejected": -0.3362218141555786, - "step": 2800 - }, - { - "epoch": 2.9365495542737285, - "eval_log_odds_chosen": 0.9868643283843994, - "eval_log_odds_ratio": -0.8558183312416077, - "eval_logits/chosen": -2.8059191703796387, - "eval_logits/rejected": -2.8221092224121094, - "eval_logps/chosen": -1.9523440599441528, - "eval_logps/rejected": -2.7882232666015625, - "eval_loss": 1.0213509798049927, - "eval_nll_loss": 0.9673047065734863, - "eval_rewards/accuracies": 0.6269841194152832, - "eval_rewards/chosen": -0.09761719405651093, - "eval_rewards/margins": 0.04179396852850914, - "eval_rewards/rejected": -0.13941116631031036, - "eval_runtime": 140.3646, - "eval_samples_per_second": 14.206, - "eval_steps_per_second": 0.449, - "step": 2800 - }, - { - "epoch": 2.9470372312532773, - "grad_norm": 1.8098718147037927, - "learning_rate": 3.772913389522725e-06, - "log_odds_chosen": 7.045705318450928, - "log_odds_ratio": -0.0264790840446949, - "logits/chosen": -2.8278496265411377, - "logits/rejected": -2.935941696166992, - "logps/chosen": -0.16044145822525024, - "logps/rejected": -5.10351037979126, - "loss": 0.1197, - "nll_loss": 0.11624834686517715, - "rewards/accuracies": 0.987500011920929, - "rewards/chosen": -0.008022072724997997, - "rewards/margins": 0.24715343117713928, - "rewards/rejected": -0.25517550110816956, - "step": 2810 - }, - { - "epoch": 2.9575249082328265, - "grad_norm": 1.8754542855362524, - "learning_rate": 3.7662178857735478e-06, - "log_odds_chosen": 8.025814056396484, - "log_odds_ratio": -0.014746090397238731, - "logits/chosen": -2.7981061935424805, - "logits/rejected": -2.9223358631134033, - "logps/chosen": -0.1609780192375183, - "logps/rejected": -6.0790114402771, - "loss": 0.1164, - "nll_loss": 0.114871546626091, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00804890040308237, - "rewards/margins": 0.29590168595314026, - "rewards/rejected": -0.30395060777664185, - "step": 2820 - }, - { - "epoch": 2.9680125852123753, - "grad_norm": 2.270114335100112, - "learning_rate": 3.7595579019845623e-06, - "log_odds_chosen": 7.872386932373047, - "log_odds_ratio": -0.01882219687104225, - "logits/chosen": -2.8168020248413086, - "logits/rejected": -2.900966167449951, - "logps/chosen": -0.1528329849243164, - "logps/rejected": -5.721396446228027, - "loss": 0.117, - "nll_loss": 0.1145024448633194, - "rewards/accuracies": 0.9937499761581421, - "rewards/chosen": -0.007641649339348078, - "rewards/margins": 0.27842822670936584, - "rewards/rejected": -0.2860698103904724, - "step": 2830 - }, - { - "epoch": 2.9785002621919245, - "grad_norm": 2.2955550853318907, - "learning_rate": 3.752933125204008e-06, - "log_odds_chosen": 8.305427551269531, - "log_odds_ratio": -0.02256721630692482, - "logits/chosen": -2.8052284717559814, - "logits/rejected": -2.9265544414520264, - "logps/chosen": -0.13989822566509247, - "logps/rejected": -6.217524528503418, - "loss": 0.1182, - "nll_loss": 0.12114028632640839, - "rewards/accuracies": 0.981249988079071, - "rewards/chosen": -0.006994911935180426, - "rewards/margins": 0.30388128757476807, - "rewards/rejected": -0.31087619066238403, - "step": 2840 - }, - { - "epoch": 2.9889879391714738, - "grad_norm": 1.888221991554896, - "learning_rate": 3.7463432463267764e-06, - "log_odds_chosen": 7.020120143890381, - "log_odds_ratio": -0.01538365613669157, - "logits/chosen": -2.8246865272521973, - "logits/rejected": -2.9202027320861816, - "logps/chosen": -0.16290083527565002, - "logps/rejected": -4.992356777191162, - "loss": 0.1252, - "nll_loss": 0.14337727427482605, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008145040832459927, - "rewards/margins": 0.24147279560565948, - "rewards/rejected": -0.24961784482002258, - "step": 2850 - }, - { - "epoch": 2.9984268484530676, - "step": 2859, + "epoch": 0.9994756161510225, + "step": 953, "total_flos": 0.0, - "train_loss": 0.32389816019492534, - "train_runtime": 62235.4926, - "train_samples_per_second": 2.941, + "train_loss": 0.5301580581685054, + "train_runtime": 20737.8205, + "train_samples_per_second": 2.942, "train_steps_per_second": 0.046 } ], "logging_steps": 10, - "max_steps": 2859, + "max_steps": 953, "num_input_tokens_seen": 0, - "num_train_epochs": 3, + "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": {