{ "best_metric": 14.269521713256836, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-0.1-W0/checkpoint-850", "epoch": 3.119981105337742, "eval_steps": 50, "global_step": 1100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 17829.928247930075, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 169.5214, "objective": 153.4677734375, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1, "wo_beta": 14.830931663513184 }, { "dpo_loss": 0.6812583208084106, "epoch": 0.14170996693434104, "grad_norm": 16211.46107075598, "learning_rate": 1.4204545454545458e-06, "logits": -1.4696459770202637, "logps": -84.0085220336914, "loss": 176.8183, "objective": 176.67568969726562, "ranking_idealized": 0.608418345451355, "ranking_idealized_expo": 0.5229591727256775, "ranking_simple": 0.5229591727256775, "regularize": 0.40743452310562134, "step": 50, "wo_beta": 15.723442077636719 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6874236464500427, "eval_logits": -1.4744971990585327, "eval_logps": -93.32908630371094, "eval_loss": 185.6754150390625, "eval_objective": 183.7364959716797, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.4177533686161041, "eval_runtime": 313.3628, "eval_samples_per_second": 18.477, "eval_steps_per_second": 1.541, "eval_wo_beta": 16.450912475585938, "step": 50 }, { "dpo_loss": 0.6344443559646606, "epoch": 0.2834199338686821, "grad_norm": 13549.385454159787, "learning_rate": 2.8409090909090916e-06, "logits": -1.4799116849899292, "logps": -83.19428253173828, "loss": 168.1755, "objective": 170.85800170898438, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5141666531562805, "ranking_simple": 0.5462499856948853, "regularize": 0.3876563012599945, "step": 100, "wo_beta": 15.583632469177246 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6818934679031372, "eval_logits": -1.4487266540527344, "eval_logps": -93.78287506103516, "eval_loss": 195.25457763671875, "eval_objective": 190.24264526367188, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.4319508373737335, "eval_runtime": 308.147, "eval_samples_per_second": 18.79, "eval_steps_per_second": 1.567, "eval_wo_beta": 16.292007446289062, "step": 100 }, { "dpo_loss": 0.6211874485015869, "epoch": 0.42512990080302315, "grad_norm": 11468.6742630064, "learning_rate": 4.2613636363636365e-06, "logits": -1.3584699630737305, "logps": -84.20758056640625, "loss": 182.7148, "objective": 183.446044921875, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5287500023841858, "ranking_simple": 0.5704166889190674, "regularize": 0.408563494682312, "step": 150, "wo_beta": 15.468416213989258 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.696851909160614, "eval_logits": -1.2730469703674316, "eval_logps": -89.73287200927734, "eval_loss": 218.5299072265625, "eval_objective": 213.48837280273438, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.4859408438205719, "eval_runtime": 310.345, "eval_samples_per_second": 18.657, "eval_steps_per_second": 1.556, "eval_wo_beta": 15.804472923278809, "step": 150 }, { "dpo_loss": 0.6188575029373169, "epoch": 0.5668398677373642, "grad_norm": 9821.481225782692, "learning_rate": 4.997168347957521e-06, "logits": -1.112230896949768, "logps": -78.28816223144531, "loss": 203.0993, "objective": 197.34710693359375, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5529166460037231, "regularize": 0.43580684065818787, "step": 200, "wo_beta": 15.601910591125488 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.7050652503967285, "eval_logits": -1.0446792840957642, "eval_logps": -79.70624542236328, "eval_loss": 251.92431640625, "eval_objective": 242.6405792236328, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.5518472790718079, "eval_runtime": 309.0635, "eval_samples_per_second": 18.734, "eval_steps_per_second": 1.563, "eval_wo_beta": 14.694934844970703, "step": 200 }, { "dpo_loss": 0.6134654879570007, "epoch": 0.7085498346717053, "grad_norm": 9089.685589774383, "learning_rate": 4.973122855144066e-06, "logits": -0.831710934638977, "logps": -75.95425415039062, "loss": 207.5481, "objective": 209.42929077148438, "ranking_idealized": 0.5991666913032532, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.5600000023841858, "regularize": 0.46070510149002075, "step": 250, "wo_beta": 15.946144104003906 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.7055123448371887, "eval_logits": -1.0362330675125122, "eval_logps": -80.09400177001953, "eval_loss": 251.99049377441406, "eval_objective": 244.35101318359375, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 0.5541524887084961, "eval_runtime": 308.2828, "eval_samples_per_second": 18.781, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.815776824951172, "step": 250 }, { "dpo_loss": 0.6043393015861511, "epoch": 0.8502598016060463, "grad_norm": 8539.808891034761, "learning_rate": 4.924776641419513e-06, "logits": -0.7953253388404846, "logps": -75.85888671875, "loss": 193.4843, "objective": 194.79408264160156, "ranking_idealized": 0.5799999833106995, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.5475000143051147, "regularize": 0.43644893169403076, "step": 300, "wo_beta": 15.487529754638672 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.7149869799613953, "eval_logits": -0.7137008905410767, "eval_logps": -80.42964172363281, "eval_loss": 266.710693359375, "eval_objective": 258.3957214355469, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 0.5880751013755798, "eval_runtime": 308.0939, "eval_samples_per_second": 18.793, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.543147087097168, "step": 300 }, { "dpo_loss": 0.5903122425079346, "epoch": 0.9919697685403873, "grad_norm": 7990.460628234389, "learning_rate": 4.8526047530778175e-06, "logits": -0.5379437208175659, "logps": -75.28119659423828, "loss": 182.6922, "objective": 183.501220703125, "ranking_idealized": 0.60916668176651, "ranking_idealized_expo": 0.5270833373069763, "ranking_simple": 0.5887500047683716, "regularize": 0.4042646884918213, "step": 350, "wo_beta": 15.643413543701172 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.7073115706443787, "eval_logits": -0.6448346972465515, "eval_logps": -76.36376190185547, "eval_loss": 262.3345947265625, "eval_objective": 254.63600158691406, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 0.5802178978919983, "eval_runtime": 307.6386, "eval_samples_per_second": 18.821, "eval_steps_per_second": 1.57, "eval_wo_beta": 14.617557525634766, "step": 350 }, { "dpo_loss": 0.5762209892272949, "epoch": 1.1336797354747283, "grad_norm": 8197.08929637491, "learning_rate": 4.757316345716554e-06, "logits": -0.5033857226371765, "logps": -73.0711441040039, "loss": 166.9683, "objective": 169.2793426513672, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5337499976158142, "ranking_simple": 0.5870833396911621, "regularize": 0.3724302351474762, "step": 400, "wo_beta": 15.946381568908691 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7151855230331421, "eval_logits": -0.6391506791114807, "eval_logps": -78.34819793701172, "eval_loss": 272.3287658691406, "eval_objective": 264.91107177734375, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.6056078672409058, "eval_runtime": 308.6832, "eval_samples_per_second": 18.757, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.651263236999512, "step": 400 }, { "dpo_loss": 0.5697375535964966, "epoch": 1.2753897024090695, "grad_norm": 7987.082885512546, "learning_rate": 4.639847716126855e-06, "logits": -0.49218180775642395, "logps": -75.01708984375, "loss": 155.9364, "objective": 154.53318786621094, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.5849999785423279, "regularize": 0.34387490153312683, "step": 450, "wo_beta": 16.392471313476562 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.7185649871826172, "eval_logits": -0.4207253158092499, "eval_logps": -80.52300262451172, "eval_loss": 275.0490417480469, "eval_objective": 268.8637390136719, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.6128751039505005, "eval_runtime": 308.3205, "eval_samples_per_second": 18.779, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.77772331237793, "step": 450 }, { "dpo_loss": 0.5621860027313232, "epoch": 1.4170996693434104, "grad_norm": 8470.747695660142, "learning_rate": 4.501353102310901e-06, "logits": -0.3975617587566376, "logps": -76.15510559082031, "loss": 143.4724, "objective": 144.99127197265625, "ranking_idealized": 0.57833331823349, "ranking_idealized_expo": 0.4983333349227905, "ranking_simple": 0.5641666650772095, "regularize": 0.3229096531867981, "step": 500, "wo_beta": 15.915486335754395 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.720949113368988, "eval_logits": -0.5140562653541565, "eval_logps": -80.55872344970703, "eval_loss": 275.96630859375, "eval_objective": 270.0382995605469, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.6150393486022949, "eval_runtime": 308.3699, "eval_samples_per_second": 18.776, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.436367988586426, "step": 500 }, { "dpo_loss": 0.5593892931938171, "epoch": 1.5588096362777515, "grad_norm": 8086.109814509309, "learning_rate": 4.34319334202531e-06, "logits": -0.5436981320381165, "logps": -76.949462890625, "loss": 141.3444, "objective": 144.28431701660156, "ranking_idealized": 0.5945833325386047, "ranking_idealized_expo": 0.5116666555404663, "ranking_simple": 0.5841666460037231, "regularize": 0.3211959898471832, "step": 550, "wo_beta": 15.494115829467773 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7139093279838562, "eval_logits": -0.6337844729423523, "eval_logps": -81.12705993652344, "eval_loss": 275.0850524902344, "eval_objective": 269.2188720703125, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.6158860325813293, "eval_runtime": 309.36, "eval_samples_per_second": 18.716, "eval_steps_per_second": 1.561, "eval_wo_beta": 14.642486572265625, "step": 550 }, { "dpo_loss": 0.5621269345283508, "epoch": 1.7028814359943316, "grad_norm": 7358.036337966711, "learning_rate": 4.16692250129073e-06, "logits": -0.5125311613082886, "logps": -76.2679443359375, "loss": 136.172, "objective": 138.94873046875, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.51583331823349, "ranking_simple": 0.5874999761581421, "regularize": 0.304604709148407, "step": 600, "wo_beta": 15.486053466796875 }, { "epoch": 1.7028814359943316, "eval_dpo_loss": 0.711104691028595, "eval_logits": -0.5857469439506531, "eval_logps": -79.42213439941406, "eval_loss": 273.6680908203125, "eval_objective": 264.6509704589844, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 0.6011682748794556, "eval_runtime": 311.317, "eval_samples_per_second": 18.598, "eval_steps_per_second": 1.551, "eval_wo_beta": 14.563067436218262, "step": 600 }, { "dpo_loss": 0.5565243363380432, "epoch": 1.8445914029286725, "grad_norm": 7716.658918543234, "learning_rate": 3.974272604254906e-06, "logits": -0.45386597514152527, "logps": -76.72352600097656, "loss": 130.7133, "objective": 137.35403442382812, "ranking_idealized": 0.6075000166893005, "ranking_idealized_expo": 0.5308333039283752, "ranking_simple": 0.5920833349227905, "regularize": 0.2999296486377716, "step": 650, "wo_beta": 16.43378448486328 }, { "epoch": 1.8445914029286725, "eval_dpo_loss": 0.7193200588226318, "eval_logits": -0.4215196371078491, "eval_logps": -80.21295166015625, "eval_loss": 276.3609313964844, "eval_objective": 269.6939392089844, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.614128828048706, "eval_runtime": 309.0561, "eval_samples_per_second": 18.734, "eval_steps_per_second": 1.563, "eval_wo_beta": 14.545560836791992, "step": 650 }, { "dpo_loss": 0.5614636540412903, "epoch": 1.9863013698630136, "grad_norm": 6878.133616740314, "learning_rate": 3.767136614452458e-06, "logits": -0.4337880313396454, "logps": -76.46927642822266, "loss": 122.624, "objective": 121.14984130859375, "ranking_idealized": 0.5933333039283752, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.5766666531562805, "regularize": 0.27313005924224854, "step": 700, "wo_beta": 15.872406005859375 }, { "epoch": 1.9863013698630136, "eval_dpo_loss": 0.717802107334137, "eval_logits": -0.5263403058052063, "eval_logps": -80.99676513671875, "eval_loss": 278.4689636230469, "eval_objective": 271.4756774902344, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.6189925074577332, "eval_runtime": 308.2823, "eval_samples_per_second": 18.781, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.466358184814453, "step": 700 }, { "dpo_loss": 0.5429660677909851, "epoch": 2.1280113367973548, "grad_norm": 7099.084874469886, "learning_rate": 3.547549834686222e-06, "logits": -0.456377774477005, "logps": -78.5710678100586, "loss": 108.7022, "objective": 107.37686157226562, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.5899999737739563, "regularize": 0.2386901080608368, "step": 750, "wo_beta": 16.154056549072266 }, { "epoch": 2.1280113367973548, "eval_dpo_loss": 0.7206873297691345, "eval_logits": -0.4656512439250946, "eval_logps": -84.00879669189453, "eval_loss": 282.56683349609375, "eval_objective": 276.0200500488281, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.6302040815353394, "eval_runtime": 308.3238, "eval_samples_per_second": 18.779, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.451681137084961, "step": 750 }, { "dpo_loss": 0.5408411026000977, "epoch": 2.269721303731696, "grad_norm": 7050.1218576698175, "learning_rate": 3.3176699082935546e-06, "logits": -0.44169890880584717, "logps": -79.25837707519531, "loss": 104.1923, "objective": 101.39546966552734, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5095833539962769, "ranking_simple": 0.5879166722297668, "regularize": 0.22695893049240112, "step": 800, "wo_beta": 15.370372772216797 }, { "epoch": 2.269721303731696, "eval_dpo_loss": 0.7166009545326233, "eval_logits": -0.46399185061454773, "eval_logps": -81.63129425048828, "eval_loss": 278.05548095703125, "eval_objective": 272.76220703125, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.6210389137268066, "eval_runtime": 316.0101, "eval_samples_per_second": 18.322, "eval_steps_per_second": 1.528, "eval_wo_beta": 14.43066692352295, "step": 800 }, { "dpo_loss": 0.5408244132995605, "epoch": 2.411431270666037, "grad_norm": 6732.756367827086, "learning_rate": 3.0797556183036582e-06, "logits": -0.398872435092926, "logps": -77.83311462402344, "loss": 99.0867, "objective": 97.56111145019531, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5879166722297668, "regularize": 0.21405553817749023, "step": 850, "wo_beta": 15.753022193908691 }, { "epoch": 2.411431270666037, "eval_dpo_loss": 0.7208675742149353, "eval_logits": -0.5211770534515381, "eval_logps": -81.06893157958984, "eval_loss": 283.0077819824219, "eval_objective": 277.3703308105469, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5331262946128845, "eval_regularize": 0.6309527158737183, "eval_runtime": 308.4936, "eval_samples_per_second": 18.769, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.269521713256836, "step": 850 }, { "dpo_loss": 0.5474214553833008, "epoch": 2.5531412376003777, "grad_norm": 6765.065529775399, "learning_rate": 2.8361446928038298e-06, "logits": -0.45377472043037415, "logps": -77.1721420288086, "loss": 91.7475, "objective": 90.64097595214844, "ranking_idealized": 0.5887500047683716, "ranking_idealized_expo": 0.5191666483879089, "ranking_simple": 0.5808333158493042, "regularize": 0.20561054348945618, "step": 900, "wo_beta": 16.313594818115234 }, { "epoch": 2.5531412376003777, "eval_dpo_loss": 0.7200337052345276, "eval_logits": -0.5149067044258118, "eval_logps": -81.61436462402344, "eval_loss": 279.6676025390625, "eval_objective": 275.1768798828125, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 0.6278803944587708, "eval_runtime": 308.2613, "eval_samples_per_second": 18.783, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.35700798034668, "step": 900 }, { "dpo_loss": 0.536076545715332, "epoch": 2.694851204534719, "grad_norm": 6780.927379854703, "learning_rate": 2.5892308345974517e-06, "logits": -0.3948451280593872, "logps": -78.59798431396484, "loss": 87.8681, "objective": 91.65276336669922, "ranking_idealized": 0.5983333587646484, "ranking_idealized_expo": 0.5087500214576721, "ranking_simple": 0.5895833373069763, "regularize": 0.1982572227716446, "step": 950, "wo_beta": 16.053390502929688 }, { "epoch": 2.694851204534719, "eval_dpo_loss": 0.7190758585929871, "eval_logits": -0.44280436635017395, "eval_logps": -81.85442352294922, "eval_loss": 281.5718078613281, "eval_objective": 275.7560119628906, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.6276679039001465, "eval_runtime": 308.5458, "eval_samples_per_second": 18.765, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.350942611694336, "step": 950 }, { "dpo_loss": 0.5394951105117798, "epoch": 2.83656117146906, "grad_norm": 7326.8730595182415, "learning_rate": 2.341440200858589e-06, "logits": -0.389419287443161, "logps": -77.48452758789062, "loss": 81.742, "objective": 80.39604187011719, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5099999904632568, "ranking_simple": 0.5883333086967468, "regularize": 0.18344084918498993, "step": 1000, "wo_beta": 15.534008979797363 }, { "epoch": 2.83656117146906, "eval_dpo_loss": 0.7197140455245972, "eval_logits": -0.49511978030204773, "eval_logps": -81.44123077392578, "eval_loss": 279.1323547363281, "eval_objective": 274.5647277832031, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.6257451176643372, "eval_runtime": 309.1776, "eval_samples_per_second": 18.727, "eval_steps_per_second": 1.562, "eval_wo_beta": 14.355087280273438, "step": 1000 }, { "dpo_loss": 0.5420763492584229, "epoch": 2.978271138403401, "grad_norm": 6821.455863031989, "learning_rate": 2.0952075638923656e-06, "logits": -0.4298148453235626, "logps": -78.06723022460938, "loss": 76.4372, "objective": 76.12059783935547, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5958333611488342, "regularize": 0.1691569834947586, "step": 1050, "wo_beta": 16.169239044189453 }, { "epoch": 2.978271138403401, "eval_dpo_loss": 0.7184009552001953, "eval_logits": -0.4501994550228119, "eval_logps": -82.39596557617188, "eval_loss": 279.18841552734375, "eval_objective": 273.9026184082031, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.6249070167541504, "eval_runtime": 310.2594, "eval_samples_per_second": 18.662, "eval_steps_per_second": 1.557, "eval_wo_beta": 14.32027816772461, "step": 1050 }, { "dpo_loss": 0.5327512621879578, "epoch": 3.119981105337742, "grad_norm": 6539.491492464407, "learning_rate": 1.852952387243698e-06, "logits": -0.39219775795936584, "logps": -78.88080596923828, "loss": 67.4698, "objective": 66.67189025878906, "ranking_idealized": 0.6045833230018616, "ranking_idealized_expo": 0.528333306312561, "ranking_simple": 0.6029166579246521, "regularize": 0.14756560325622559, "step": 1100, "wo_beta": 16.35073471069336 }, { "epoch": 3.119981105337742, "eval_dpo_loss": 0.7169165015220642, "eval_logits": -0.41896748542785645, "eval_logps": -82.91073608398438, "eval_loss": 280.53173828125, "eval_objective": 274.793212890625, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.6259706616401672, "eval_runtime": 310.4871, "eval_samples_per_second": 18.648, "eval_steps_per_second": 1.556, "eval_wo_beta": 14.341768264770508, "step": 1100 }, { "epoch": 3.119981105337742, "step": 1100, "total_flos": 0.0, "train_loss": 50.30704811789773, "train_runtime": 15116.5529, "train_samples_per_second": 16.803, "train_steps_per_second": 0.116 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }