Minbyul's picture
Model save
c3d6868 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 306,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 16.160893330879862,
"learning_rate": 1.6129032258064514e-08,
"logits/chosen": -1.6982225179672241,
"logits/rejected": -1.086500644683838,
"logps/chosen": -208.50250244140625,
"logps/rejected": -262.22808837890625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 18.69135932886243,
"learning_rate": 1.6129032258064515e-07,
"logits/chosen": -1.4697270393371582,
"logits/rejected": -0.9334302544593811,
"logps/chosen": -234.84239196777344,
"logps/rejected": -317.2652893066406,
"loss": 0.6929,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.000248217984335497,
"rewards/margins": 0.00017482459952589124,
"rewards/rejected": -0.00042304262751713395,
"step": 10
},
{
"epoch": 0.07,
"grad_norm": 16.031117031534944,
"learning_rate": 3.225806451612903e-07,
"logits/chosen": -1.3034999370574951,
"logits/rejected": -1.0336174964904785,
"logps/chosen": -224.30307006835938,
"logps/rejected": -348.02825927734375,
"loss": 0.6916,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.0018210510024800897,
"rewards/margins": 0.0035554722417145967,
"rewards/rejected": -0.0017344218213111162,
"step": 20
},
{
"epoch": 0.1,
"grad_norm": 16.925592105056882,
"learning_rate": 4.838709677419355e-07,
"logits/chosen": -1.0461599826812744,
"logits/rejected": -0.9585116505622864,
"logps/chosen": -237.54373168945312,
"logps/rejected": -275.940673828125,
"loss": 0.6822,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": 0.01179418247193098,
"rewards/margins": 0.023266470059752464,
"rewards/rejected": -0.011472286656498909,
"step": 30
},
{
"epoch": 0.13,
"grad_norm": 17.790603933181593,
"learning_rate": 4.986797785768295e-07,
"logits/chosen": -1.2466868162155151,
"logits/rejected": -1.0099724531173706,
"logps/chosen": -226.1925506591797,
"logps/rejected": -308.3481750488281,
"loss": 0.6584,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.006962643004953861,
"rewards/margins": 0.06420420855283737,
"rewards/rejected": -0.057241566479206085,
"step": 40
},
{
"epoch": 0.16,
"grad_norm": 18.832438856630475,
"learning_rate": 4.941339491514909e-07,
"logits/chosen": -1.0786056518554688,
"logits/rejected": -0.8200371861457825,
"logps/chosen": -250.77627563476562,
"logps/rejected": -305.92352294921875,
"loss": 0.5989,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03331167623400688,
"rewards/margins": 0.29684731364250183,
"rewards/rejected": -0.3301590085029602,
"step": 50
},
{
"epoch": 0.2,
"grad_norm": 26.859354757154694,
"learning_rate": 4.864054603442063e-07,
"logits/chosen": -0.8682538270950317,
"logits/rejected": -0.8277397155761719,
"logps/chosen": -281.60577392578125,
"logps/rejected": -436.7176208496094,
"loss": 0.5358,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1347092092037201,
"rewards/margins": 0.7372199296951294,
"rewards/rejected": -0.8719291687011719,
"step": 60
},
{
"epoch": 0.23,
"grad_norm": 19.728868151669563,
"learning_rate": 4.755950648257788e-07,
"logits/chosen": -0.9148917198181152,
"logits/rejected": -0.5669609308242798,
"logps/chosen": -303.3214416503906,
"logps/rejected": -471.29345703125,
"loss": 0.4898,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.24109110236167908,
"rewards/margins": 1.1684527397155762,
"rewards/rejected": -1.4095438718795776,
"step": 70
},
{
"epoch": 0.26,
"grad_norm": 17.67886600787927,
"learning_rate": 4.618436926341606e-07,
"logits/chosen": -0.6920875310897827,
"logits/rejected": -0.1917627602815628,
"logps/chosen": -238.93624877929688,
"logps/rejected": -427.3094177246094,
"loss": 0.4322,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.18579021096229553,
"rewards/margins": 1.348921298980713,
"rewards/rejected": -1.5347115993499756,
"step": 80
},
{
"epoch": 0.29,
"grad_norm": 20.470351398393845,
"learning_rate": 4.4533061393588276e-07,
"logits/chosen": -0.9842801094055176,
"logits/rejected": -0.17733868956565857,
"logps/chosen": -269.7094421386719,
"logps/rejected": -537.8287963867188,
"loss": 0.4163,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.2889798879623413,
"rewards/margins": 1.93475341796875,
"rewards/rejected": -2.223733425140381,
"step": 90
},
{
"epoch": 0.33,
"grad_norm": 22.11813905754231,
"learning_rate": 4.262711019652764e-07,
"logits/chosen": -0.5012297630310059,
"logits/rejected": -9.913742542266846e-05,
"logps/chosen": -328.4926452636719,
"logps/rejected": -584.0737915039062,
"loss": 0.3672,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.3633125424385071,
"rewards/margins": 2.0779290199279785,
"rewards/rejected": -2.4412412643432617,
"step": 100
},
{
"epoch": 0.33,
"eval_logits/chosen": -1.7892314195632935,
"eval_logits/rejected": 0.012374745681881905,
"eval_logps/chosen": -325.3137512207031,
"eval_logps/rejected": -259.1305847167969,
"eval_loss": 0.5652258396148682,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -0.27210739254951477,
"eval_rewards/margins": 0.3719515800476074,
"eval_rewards/rejected": -0.6440589427947998,
"eval_runtime": 62.3299,
"eval_samples_per_second": 9.113,
"eval_steps_per_second": 0.289,
"step": 100
},
{
"epoch": 0.36,
"grad_norm": 27.459187382986496,
"learning_rate": 4.0491362660864523e-07,
"logits/chosen": -0.4417840838432312,
"logits/rejected": 0.22044658660888672,
"logps/chosen": -216.5067138671875,
"logps/rejected": -570.1358032226562,
"loss": 0.3445,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.2089937925338745,
"rewards/margins": 2.619229555130005,
"rewards/rejected": -2.82822322845459,
"step": 110
},
{
"epoch": 0.39,
"grad_norm": 22.673980281488344,
"learning_rate": 3.8153661521931215e-07,
"logits/chosen": -0.25229763984680176,
"logits/rejected": 0.42893147468566895,
"logps/chosen": -287.91815185546875,
"logps/rejected": -586.4707641601562,
"loss": 0.3152,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.3577408790588379,
"rewards/margins": 2.5164005756378174,
"rewards/rejected": -2.8741414546966553,
"step": 120
},
{
"epoch": 0.42,
"grad_norm": 22.548222714710292,
"learning_rate": 3.5644482289126813e-07,
"logits/chosen": -0.36232301592826843,
"logits/rejected": 0.7560933828353882,
"logps/chosen": -271.9062805175781,
"logps/rejected": -581.7931518554688,
"loss": 0.3195,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.2464122772216797,
"rewards/margins": 2.863015651702881,
"rewards/rejected": -3.1094279289245605,
"step": 130
},
{
"epoch": 0.46,
"grad_norm": 23.598569359547252,
"learning_rate": 3.299653595104602e-07,
"logits/chosen": 0.2891393005847931,
"logits/rejected": 1.1849420070648193,
"logps/chosen": -278.166748046875,
"logps/rejected": -591.746337890625,
"loss": 0.2842,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.2587340772151947,
"rewards/margins": 2.8643851280212402,
"rewards/rejected": -3.123119354248047,
"step": 140
},
{
"epoch": 0.49,
"grad_norm": 21.260621719260254,
"learning_rate": 3.024434253771773e-07,
"logits/chosen": -0.052896756678819656,
"logits/rejected": 1.054487943649292,
"logps/chosen": -255.85391235351562,
"logps/rejected": -814.2733154296875,
"loss": 0.2754,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.3424001932144165,
"rewards/margins": 4.599584579467773,
"rewards/rejected": -4.9419846534729,
"step": 150
},
{
"epoch": 0.52,
"grad_norm": 24.166711565305622,
"learning_rate": 2.7423781099222037e-07,
"logits/chosen": -0.3343699276447296,
"logits/rejected": 1.2091766595840454,
"logps/chosen": -241.1891326904297,
"logps/rejected": -667.7901000976562,
"loss": 0.2519,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.1985635608434677,
"rewards/margins": 3.7136471271514893,
"rewards/rejected": -3.912210464477539,
"step": 160
},
{
"epoch": 0.56,
"grad_norm": 47.56416189863626,
"learning_rate": 2.4571621967402515e-07,
"logits/chosen": -0.14796659350395203,
"logits/rejected": 0.9326593279838562,
"logps/chosen": -231.9830322265625,
"logps/rejected": -778.2721557617188,
"loss": 0.2593,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.2619406580924988,
"rewards/margins": 4.371499538421631,
"rewards/rejected": -4.633440017700195,
"step": 170
},
{
"epoch": 0.59,
"grad_norm": 23.682645133331448,
"learning_rate": 2.1725047398357676e-07,
"logits/chosen": 0.07808978855609894,
"logits/rejected": 0.8551836013793945,
"logps/chosen": -299.97503662109375,
"logps/rejected": -720.2726440429688,
"loss": 0.2309,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.3632424473762512,
"rewards/margins": 3.9580280780792236,
"rewards/rejected": -4.321269989013672,
"step": 180
},
{
"epoch": 0.62,
"grad_norm": 24.593709800613684,
"learning_rate": 1.892116684486976e-07,
"logits/chosen": -0.4507158398628235,
"logits/rejected": 1.0840203762054443,
"logps/chosen": -231.7598114013672,
"logps/rejected": -695.33837890625,
"loss": 0.2105,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.3032680153846741,
"rewards/margins": 4.107699871063232,
"rewards/rejected": -4.410967826843262,
"step": 190
},
{
"epoch": 0.65,
"grad_norm": 26.182421304771378,
"learning_rate": 1.619653317793613e-07,
"logits/chosen": -0.4781159460544586,
"logits/rejected": 1.3689903020858765,
"logps/chosen": -250.3883819580078,
"logps/rejected": -800.0301513671875,
"loss": 0.2495,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.2692957818508148,
"rewards/margins": 4.869724750518799,
"rewards/rejected": -5.139020919799805,
"step": 200
},
{
"epoch": 0.65,
"eval_logits/chosen": -1.4423691034317017,
"eval_logits/rejected": 0.8730748295783997,
"eval_logps/chosen": -366.8614196777344,
"eval_logps/rejected": -347.8152160644531,
"eval_loss": 0.5144294500350952,
"eval_rewards/accuracies": 0.7152777910232544,
"eval_rewards/chosen": -0.6875841617584229,
"eval_rewards/margins": 0.8433213233947754,
"eval_rewards/rejected": -1.5309053659439087,
"eval_runtime": 61.5529,
"eval_samples_per_second": 9.228,
"eval_steps_per_second": 0.292,
"step": 200
},
{
"epoch": 0.69,
"grad_norm": 26.71444019699555,
"learning_rate": 1.3586666164195438e-07,
"logits/chosen": -0.11332446336746216,
"logits/rejected": 1.1528918743133545,
"logps/chosen": -242.18594360351562,
"logps/rejected": -730.3687744140625,
"loss": 0.2355,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.17774315178394318,
"rewards/margins": 4.097687721252441,
"rewards/rejected": -4.275431156158447,
"step": 210
},
{
"epoch": 0.72,
"grad_norm": 18.78114156072228,
"learning_rate": 1.1125589411448994e-07,
"logits/chosen": -0.16574744880199432,
"logits/rejected": 0.8906081914901733,
"logps/chosen": -257.7942810058594,
"logps/rejected": -799.7039794921875,
"loss": 0.2147,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.10207457840442657,
"rewards/margins": 4.533341407775879,
"rewards/rejected": -4.635416507720947,
"step": 220
},
{
"epoch": 0.75,
"grad_norm": 25.01487938388404,
"learning_rate": 8.845386818900646e-08,
"logits/chosen": -0.45102643966674805,
"logits/rejected": 0.8137510418891907,
"logps/chosen": -249.04348754882812,
"logps/rejected": -713.5521240234375,
"loss": 0.1934,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.15099892020225525,
"rewards/margins": 4.111520290374756,
"rewards/rejected": -4.262519359588623,
"step": 230
},
{
"epoch": 0.78,
"grad_norm": 30.245012228652286,
"learning_rate": 6.775784314464716e-08,
"logits/chosen": -0.3785732388496399,
"logits/rejected": 1.2067172527313232,
"logps/chosen": -247.9203338623047,
"logps/rejected": -789.9756469726562,
"loss": 0.2121,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.31377214193344116,
"rewards/margins": 4.594644546508789,
"rewards/rejected": -4.908417701721191,
"step": 240
},
{
"epoch": 0.82,
"grad_norm": 16.348157142642147,
"learning_rate": 4.943762331835621e-08,
"logits/chosen": -0.47478023171424866,
"logits/rejected": 1.148115873336792,
"logps/chosen": -280.11419677734375,
"logps/rejected": -811.0616455078125,
"loss": 0.1801,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.2764636278152466,
"rewards/margins": 4.763599395751953,
"rewards/rejected": -5.040062427520752,
"step": 250
},
{
"epoch": 0.85,
"grad_norm": 35.57913356579002,
"learning_rate": 3.373204079273473e-08,
"logits/chosen": -0.3408397138118744,
"logits/rejected": 1.2824140787124634,
"logps/chosen": -244.1248321533203,
"logps/rejected": -754.9166259765625,
"loss": 0.2096,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.23486094176769257,
"rewards/margins": 4.0956854820251465,
"rewards/rejected": -4.3305463790893555,
"step": 260
},
{
"epoch": 0.88,
"grad_norm": 22.72907844280726,
"learning_rate": 2.084584185459709e-08,
"logits/chosen": -0.27476102113723755,
"logits/rejected": 1.213181495666504,
"logps/chosen": -285.0547790527344,
"logps/rejected": -713.6915283203125,
"loss": 0.1874,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.21095514297485352,
"rewards/margins": 4.196396827697754,
"rewards/rejected": -4.407351493835449,
"step": 270
},
{
"epoch": 0.92,
"grad_norm": 22.905550437426676,
"learning_rate": 1.0947017814003257e-08,
"logits/chosen": 0.07453560829162598,
"logits/rejected": 1.153464913368225,
"logps/chosen": -254.3527374267578,
"logps/rejected": -810.6804809570312,
"loss": 0.1795,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2759082615375519,
"rewards/margins": 4.727480411529541,
"rewards/rejected": -5.003388404846191,
"step": 280
},
{
"epoch": 0.95,
"grad_norm": 27.524134609799987,
"learning_rate": 4.164614980622677e-09,
"logits/chosen": -0.3413197100162506,
"logits/rejected": 1.2102278470993042,
"logps/chosen": -236.2258758544922,
"logps/rejected": -899.1121215820312,
"loss": 0.1885,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.2925792336463928,
"rewards/margins": 5.496912956237793,
"rewards/rejected": -5.789492130279541,
"step": 290
},
{
"epoch": 0.98,
"grad_norm": 20.178235335586955,
"learning_rate": 5.870523477368439e-10,
"logits/chosen": 0.39968985319137573,
"logits/rejected": 0.5744360089302063,
"logps/chosen": -251.2977752685547,
"logps/rejected": -803.5077514648438,
"loss": 0.1708,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.24827781319618225,
"rewards/margins": 4.899833679199219,
"rewards/rejected": -5.148111343383789,
"step": 300
},
{
"epoch": 0.98,
"eval_logits/chosen": -1.3915222883224487,
"eval_logits/rejected": 1.0055533647537231,
"eval_logps/chosen": -385.5208435058594,
"eval_logps/rejected": -374.23699951171875,
"eval_loss": 0.534546434879303,
"eval_rewards/accuracies": 0.6527777910232544,
"eval_rewards/chosen": -0.8741780519485474,
"eval_rewards/margins": 0.9209451675415039,
"eval_rewards/rejected": -1.7951232194900513,
"eval_runtime": 60.8637,
"eval_samples_per_second": 9.332,
"eval_steps_per_second": 0.296,
"step": 300
},
{
"epoch": 1.0,
"step": 306,
"total_flos": 0.0,
"train_loss": 0.004050073670405967,
"train_runtime": 90.6565,
"train_samples_per_second": 215.892,
"train_steps_per_second": 3.375
}
],
"logging_steps": 10,
"max_steps": 306,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}