|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 306, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.160893330879862, |
|
"learning_rate": 1.6129032258064514e-08, |
|
"logits/chosen": -1.6982225179672241, |
|
"logits/rejected": -1.086500644683838, |
|
"logps/chosen": -208.50250244140625, |
|
"logps/rejected": -262.22808837890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 18.69135932886243, |
|
"learning_rate": 1.6129032258064515e-07, |
|
"logits/chosen": -1.4697270393371582, |
|
"logits/rejected": -0.9334302544593811, |
|
"logps/chosen": -234.84239196777344, |
|
"logps/rejected": -317.2652893066406, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.000248217984335497, |
|
"rewards/margins": 0.00017482459952589124, |
|
"rewards/rejected": -0.00042304262751713395, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 16.031117031534944, |
|
"learning_rate": 3.225806451612903e-07, |
|
"logits/chosen": -1.3034999370574951, |
|
"logits/rejected": -1.0336174964904785, |
|
"logps/chosen": -224.30307006835938, |
|
"logps/rejected": -348.02825927734375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0018210510024800897, |
|
"rewards/margins": 0.0035554722417145967, |
|
"rewards/rejected": -0.0017344218213111162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 16.925592105056882, |
|
"learning_rate": 4.838709677419355e-07, |
|
"logits/chosen": -1.0461599826812744, |
|
"logits/rejected": -0.9585116505622864, |
|
"logps/chosen": -237.54373168945312, |
|
"logps/rejected": -275.940673828125, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.01179418247193098, |
|
"rewards/margins": 0.023266470059752464, |
|
"rewards/rejected": -0.011472286656498909, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 17.790603933181593, |
|
"learning_rate": 4.986797785768295e-07, |
|
"logits/chosen": -1.2466868162155151, |
|
"logits/rejected": -1.0099724531173706, |
|
"logps/chosen": -226.1925506591797, |
|
"logps/rejected": -308.3481750488281, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006962643004953861, |
|
"rewards/margins": 0.06420420855283737, |
|
"rewards/rejected": -0.057241566479206085, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 18.832438856630475, |
|
"learning_rate": 4.941339491514909e-07, |
|
"logits/chosen": -1.0786056518554688, |
|
"logits/rejected": -0.8200371861457825, |
|
"logps/chosen": -250.77627563476562, |
|
"logps/rejected": -305.92352294921875, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03331167623400688, |
|
"rewards/margins": 0.29684731364250183, |
|
"rewards/rejected": -0.3301590085029602, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 26.859354757154694, |
|
"learning_rate": 4.864054603442063e-07, |
|
"logits/chosen": -0.8682538270950317, |
|
"logits/rejected": -0.8277397155761719, |
|
"logps/chosen": -281.60577392578125, |
|
"logps/rejected": -436.7176208496094, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1347092092037201, |
|
"rewards/margins": 0.7372199296951294, |
|
"rewards/rejected": -0.8719291687011719, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 19.728868151669563, |
|
"learning_rate": 4.755950648257788e-07, |
|
"logits/chosen": -0.9148917198181152, |
|
"logits/rejected": -0.5669609308242798, |
|
"logps/chosen": -303.3214416503906, |
|
"logps/rejected": -471.29345703125, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24109110236167908, |
|
"rewards/margins": 1.1684527397155762, |
|
"rewards/rejected": -1.4095438718795776, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 17.67886600787927, |
|
"learning_rate": 4.618436926341606e-07, |
|
"logits/chosen": -0.6920875310897827, |
|
"logits/rejected": -0.1917627602815628, |
|
"logps/chosen": -238.93624877929688, |
|
"logps/rejected": -427.3094177246094, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.18579021096229553, |
|
"rewards/margins": 1.348921298980713, |
|
"rewards/rejected": -1.5347115993499756, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 20.470351398393845, |
|
"learning_rate": 4.4533061393588276e-07, |
|
"logits/chosen": -0.9842801094055176, |
|
"logits/rejected": -0.17733868956565857, |
|
"logps/chosen": -269.7094421386719, |
|
"logps/rejected": -537.8287963867188, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.2889798879623413, |
|
"rewards/margins": 1.93475341796875, |
|
"rewards/rejected": -2.223733425140381, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 22.11813905754231, |
|
"learning_rate": 4.262711019652764e-07, |
|
"logits/chosen": -0.5012297630310059, |
|
"logits/rejected": -9.913742542266846e-05, |
|
"logps/chosen": -328.4926452636719, |
|
"logps/rejected": -584.0737915039062, |
|
"loss": 0.3672, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.3633125424385071, |
|
"rewards/margins": 2.0779290199279785, |
|
"rewards/rejected": -2.4412412643432617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/chosen": -1.7892314195632935, |
|
"eval_logits/rejected": 0.012374745681881905, |
|
"eval_logps/chosen": -325.3137512207031, |
|
"eval_logps/rejected": -259.1305847167969, |
|
"eval_loss": 0.5652258396148682, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.27210739254951477, |
|
"eval_rewards/margins": 0.3719515800476074, |
|
"eval_rewards/rejected": -0.6440589427947998, |
|
"eval_runtime": 62.3299, |
|
"eval_samples_per_second": 9.113, |
|
"eval_steps_per_second": 0.289, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 27.459187382986496, |
|
"learning_rate": 4.0491362660864523e-07, |
|
"logits/chosen": -0.4417840838432312, |
|
"logits/rejected": 0.22044658660888672, |
|
"logps/chosen": -216.5067138671875, |
|
"logps/rejected": -570.1358032226562, |
|
"loss": 0.3445, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2089937925338745, |
|
"rewards/margins": 2.619229555130005, |
|
"rewards/rejected": -2.82822322845459, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 22.673980281488344, |
|
"learning_rate": 3.8153661521931215e-07, |
|
"logits/chosen": -0.25229763984680176, |
|
"logits/rejected": 0.42893147468566895, |
|
"logps/chosen": -287.91815185546875, |
|
"logps/rejected": -586.4707641601562, |
|
"loss": 0.3152, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.3577408790588379, |
|
"rewards/margins": 2.5164005756378174, |
|
"rewards/rejected": -2.8741414546966553, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 22.548222714710292, |
|
"learning_rate": 3.5644482289126813e-07, |
|
"logits/chosen": -0.36232301592826843, |
|
"logits/rejected": 0.7560933828353882, |
|
"logps/chosen": -271.9062805175781, |
|
"logps/rejected": -581.7931518554688, |
|
"loss": 0.3195, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2464122772216797, |
|
"rewards/margins": 2.863015651702881, |
|
"rewards/rejected": -3.1094279289245605, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 23.598569359547252, |
|
"learning_rate": 3.299653595104602e-07, |
|
"logits/chosen": 0.2891393005847931, |
|
"logits/rejected": 1.1849420070648193, |
|
"logps/chosen": -278.166748046875, |
|
"logps/rejected": -591.746337890625, |
|
"loss": 0.2842, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2587340772151947, |
|
"rewards/margins": 2.8643851280212402, |
|
"rewards/rejected": -3.123119354248047, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 21.260621719260254, |
|
"learning_rate": 3.024434253771773e-07, |
|
"logits/chosen": -0.052896756678819656, |
|
"logits/rejected": 1.054487943649292, |
|
"logps/chosen": -255.85391235351562, |
|
"logps/rejected": -814.2733154296875, |
|
"loss": 0.2754, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.3424001932144165, |
|
"rewards/margins": 4.599584579467773, |
|
"rewards/rejected": -4.9419846534729, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 24.166711565305622, |
|
"learning_rate": 2.7423781099222037e-07, |
|
"logits/chosen": -0.3343699276447296, |
|
"logits/rejected": 1.2091766595840454, |
|
"logps/chosen": -241.1891326904297, |
|
"logps/rejected": -667.7901000976562, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1985635608434677, |
|
"rewards/margins": 3.7136471271514893, |
|
"rewards/rejected": -3.912210464477539, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 47.56416189863626, |
|
"learning_rate": 2.4571621967402515e-07, |
|
"logits/chosen": -0.14796659350395203, |
|
"logits/rejected": 0.9326593279838562, |
|
"logps/chosen": -231.9830322265625, |
|
"logps/rejected": -778.2721557617188, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.2619406580924988, |
|
"rewards/margins": 4.371499538421631, |
|
"rewards/rejected": -4.633440017700195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 23.682645133331448, |
|
"learning_rate": 2.1725047398357676e-07, |
|
"logits/chosen": 0.07808978855609894, |
|
"logits/rejected": 0.8551836013793945, |
|
"logps/chosen": -299.97503662109375, |
|
"logps/rejected": -720.2726440429688, |
|
"loss": 0.2309, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3632424473762512, |
|
"rewards/margins": 3.9580280780792236, |
|
"rewards/rejected": -4.321269989013672, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 24.593709800613684, |
|
"learning_rate": 1.892116684486976e-07, |
|
"logits/chosen": -0.4507158398628235, |
|
"logits/rejected": 1.0840203762054443, |
|
"logps/chosen": -231.7598114013672, |
|
"logps/rejected": -695.33837890625, |
|
"loss": 0.2105, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.3032680153846741, |
|
"rewards/margins": 4.107699871063232, |
|
"rewards/rejected": -4.410967826843262, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 26.182421304771378, |
|
"learning_rate": 1.619653317793613e-07, |
|
"logits/chosen": -0.4781159460544586, |
|
"logits/rejected": 1.3689903020858765, |
|
"logps/chosen": -250.3883819580078, |
|
"logps/rejected": -800.0301513671875, |
|
"loss": 0.2495, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.2692957818508148, |
|
"rewards/margins": 4.869724750518799, |
|
"rewards/rejected": -5.139020919799805, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.4423691034317017, |
|
"eval_logits/rejected": 0.8730748295783997, |
|
"eval_logps/chosen": -366.8614196777344, |
|
"eval_logps/rejected": -347.8152160644531, |
|
"eval_loss": 0.5144294500350952, |
|
"eval_rewards/accuracies": 0.7152777910232544, |
|
"eval_rewards/chosen": -0.6875841617584229, |
|
"eval_rewards/margins": 0.8433213233947754, |
|
"eval_rewards/rejected": -1.5309053659439087, |
|
"eval_runtime": 61.5529, |
|
"eval_samples_per_second": 9.228, |
|
"eval_steps_per_second": 0.292, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 26.71444019699555, |
|
"learning_rate": 1.3586666164195438e-07, |
|
"logits/chosen": -0.11332446336746216, |
|
"logits/rejected": 1.1528918743133545, |
|
"logps/chosen": -242.18594360351562, |
|
"logps/rejected": -730.3687744140625, |
|
"loss": 0.2355, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.17774315178394318, |
|
"rewards/margins": 4.097687721252441, |
|
"rewards/rejected": -4.275431156158447, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 18.78114156072228, |
|
"learning_rate": 1.1125589411448994e-07, |
|
"logits/chosen": -0.16574744880199432, |
|
"logits/rejected": 0.8906081914901733, |
|
"logps/chosen": -257.7942810058594, |
|
"logps/rejected": -799.7039794921875, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.10207457840442657, |
|
"rewards/margins": 4.533341407775879, |
|
"rewards/rejected": -4.635416507720947, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 25.01487938388404, |
|
"learning_rate": 8.845386818900646e-08, |
|
"logits/chosen": -0.45102643966674805, |
|
"logits/rejected": 0.8137510418891907, |
|
"logps/chosen": -249.04348754882812, |
|
"logps/rejected": -713.5521240234375, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.15099892020225525, |
|
"rewards/margins": 4.111520290374756, |
|
"rewards/rejected": -4.262519359588623, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 30.245012228652286, |
|
"learning_rate": 6.775784314464716e-08, |
|
"logits/chosen": -0.3785732388496399, |
|
"logits/rejected": 1.2067172527313232, |
|
"logps/chosen": -247.9203338623047, |
|
"logps/rejected": -789.9756469726562, |
|
"loss": 0.2121, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.31377214193344116, |
|
"rewards/margins": 4.594644546508789, |
|
"rewards/rejected": -4.908417701721191, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 16.348157142642147, |
|
"learning_rate": 4.943762331835621e-08, |
|
"logits/chosen": -0.47478023171424866, |
|
"logits/rejected": 1.148115873336792, |
|
"logps/chosen": -280.11419677734375, |
|
"logps/rejected": -811.0616455078125, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2764636278152466, |
|
"rewards/margins": 4.763599395751953, |
|
"rewards/rejected": -5.040062427520752, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 35.57913356579002, |
|
"learning_rate": 3.373204079273473e-08, |
|
"logits/chosen": -0.3408397138118744, |
|
"logits/rejected": 1.2824140787124634, |
|
"logps/chosen": -244.1248321533203, |
|
"logps/rejected": -754.9166259765625, |
|
"loss": 0.2096, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.23486094176769257, |
|
"rewards/margins": 4.0956854820251465, |
|
"rewards/rejected": -4.3305463790893555, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 22.72907844280726, |
|
"learning_rate": 2.084584185459709e-08, |
|
"logits/chosen": -0.27476102113723755, |
|
"logits/rejected": 1.213181495666504, |
|
"logps/chosen": -285.0547790527344, |
|
"logps/rejected": -713.6915283203125, |
|
"loss": 0.1874, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.21095514297485352, |
|
"rewards/margins": 4.196396827697754, |
|
"rewards/rejected": -4.407351493835449, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 22.905550437426676, |
|
"learning_rate": 1.0947017814003257e-08, |
|
"logits/chosen": 0.07453560829162598, |
|
"logits/rejected": 1.153464913368225, |
|
"logps/chosen": -254.3527374267578, |
|
"logps/rejected": -810.6804809570312, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2759082615375519, |
|
"rewards/margins": 4.727480411529541, |
|
"rewards/rejected": -5.003388404846191, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 27.524134609799987, |
|
"learning_rate": 4.164614980622677e-09, |
|
"logits/chosen": -0.3413197100162506, |
|
"logits/rejected": 1.2102278470993042, |
|
"logps/chosen": -236.2258758544922, |
|
"logps/rejected": -899.1121215820312, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2925792336463928, |
|
"rewards/margins": 5.496912956237793, |
|
"rewards/rejected": -5.789492130279541, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 20.178235335586955, |
|
"learning_rate": 5.870523477368439e-10, |
|
"logits/chosen": 0.39968985319137573, |
|
"logits/rejected": 0.5744360089302063, |
|
"logps/chosen": -251.2977752685547, |
|
"logps/rejected": -803.5077514648438, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.24827781319618225, |
|
"rewards/margins": 4.899833679199219, |
|
"rewards/rejected": -5.148111343383789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -1.3915222883224487, |
|
"eval_logits/rejected": 1.0055533647537231, |
|
"eval_logps/chosen": -385.5208435058594, |
|
"eval_logps/rejected": -374.23699951171875, |
|
"eval_loss": 0.534546434879303, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.8741780519485474, |
|
"eval_rewards/margins": 0.9209451675415039, |
|
"eval_rewards/rejected": -1.7951232194900513, |
|
"eval_runtime": 60.8637, |
|
"eval_samples_per_second": 9.332, |
|
"eval_steps_per_second": 0.296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 306, |
|
"total_flos": 0.0, |
|
"train_loss": 0.004050073670405967, |
|
"train_runtime": 90.6565, |
|
"train_samples_per_second": 215.892, |
|
"train_steps_per_second": 3.375 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 306, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|