|
{ |
|
"best_metric": 6.621464252471924, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-100/checkpoint-700", |
|
"epoch": 2.69248937175248, |
|
"eval_steps": 50, |
|
"global_step": 950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 18442.54709688657, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.271125316619873 |
|
}, |
|
{ |
|
"dpo_loss": 16.993518829345703, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 20039.682390740076, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.4496889114379883, |
|
"logps": -84.4629898071289, |
|
"loss": 17.3723, |
|
"objective": 16.993518829345703, |
|
"ranking_idealized": 0.5221088528633118, |
|
"ranking_idealized_expo": 0.5216836929321289, |
|
"ranking_simple": 0.5242347121238708, |
|
"regularize": 16.993518829345703, |
|
"step": 50, |
|
"wo_beta": 7.0965657234191895 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 31.485410690307617, |
|
"eval_logits": -1.4391247034072876, |
|
"eval_logps": -90.95195770263672, |
|
"eval_loss": 32.11253356933594, |
|
"eval_objective": 31.485410690307617, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 31.485410690307617, |
|
"eval_runtime": 309.3922, |
|
"eval_samples_per_second": 18.714, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 7.685111999511719, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 59.36994934082031, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 16985.157671530273, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.4624311923980713, |
|
"logps": -81.97602844238281, |
|
"loss": 60.4454, |
|
"objective": 59.36994934082031, |
|
"ranking_idealized": 0.5137500166893005, |
|
"ranking_idealized_expo": 0.5137500166893005, |
|
"ranking_simple": 0.5433333516120911, |
|
"regularize": 59.36994934082031, |
|
"step": 100, |
|
"wo_beta": 6.49465799331665 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 70.77193450927734, |
|
"eval_logits": -1.4385725259780884, |
|
"eval_logps": -86.5999984741211, |
|
"eval_loss": 70.99683380126953, |
|
"eval_objective": 70.77193450927734, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5305383205413818, |
|
"eval_regularize": 70.77193450927734, |
|
"eval_runtime": 308.6565, |
|
"eval_samples_per_second": 18.759, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 7.528871059417725, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 101.89765930175781, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 14194.684745919038, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.386507511138916, |
|
"logps": -78.6588134765625, |
|
"loss": 100.2237, |
|
"objective": 101.89765930175781, |
|
"ranking_idealized": 0.527916669845581, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5612499713897705, |
|
"regularize": 101.89765930175781, |
|
"step": 150, |
|
"wo_beta": 6.392862319946289 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 126.88445281982422, |
|
"eval_logits": -1.2892017364501953, |
|
"eval_logps": -85.73033905029297, |
|
"eval_loss": 129.89280700683594, |
|
"eval_objective": 126.88445281982422, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 126.88445281982422, |
|
"eval_runtime": 308.7582, |
|
"eval_samples_per_second": 18.753, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.464066505432129, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 119.16841125488281, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 12833.152797223363, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.3282432556152344, |
|
"logps": -75.89098358154297, |
|
"loss": 120.8284, |
|
"objective": 119.16841125488281, |
|
"ranking_idealized": 0.51541668176651, |
|
"ranking_idealized_expo": 0.51541668176651, |
|
"ranking_simple": 0.5562499761581421, |
|
"regularize": 119.16841125488281, |
|
"step": 200, |
|
"wo_beta": 6.29520320892334 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 159.50131225585938, |
|
"eval_logits": -1.3194636106491089, |
|
"eval_logps": -75.5542221069336, |
|
"eval_loss": 164.0151824951172, |
|
"eval_objective": 159.50131225585938, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5357142686843872, |
|
"eval_regularize": 159.50131225585938, |
|
"eval_runtime": 308.6776, |
|
"eval_samples_per_second": 18.757, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 7.183592319488525, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 134.0004119873047, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 13188.097817084132, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.138743281364441, |
|
"logps": -72.49617004394531, |
|
"loss": 134.8217, |
|
"objective": 134.0004119873047, |
|
"ranking_idealized": 0.5162500143051147, |
|
"ranking_idealized_expo": 0.5162500143051147, |
|
"ranking_simple": 0.5795833468437195, |
|
"regularize": 134.0004119873047, |
|
"step": 250, |
|
"wo_beta": 6.357597827911377 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 190.8510284423828, |
|
"eval_logits": -1.2058484554290771, |
|
"eval_logps": -79.3890609741211, |
|
"eval_loss": 195.72117614746094, |
|
"eval_objective": 190.8510284423828, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 190.8510284423828, |
|
"eval_runtime": 308.7239, |
|
"eval_samples_per_second": 18.755, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 7.27105188369751, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 114.66310119628906, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 8558.083204689276, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -1.090308666229248, |
|
"logps": -76.0093765258789, |
|
"loss": 119.0273, |
|
"objective": 114.66310119628906, |
|
"ranking_idealized": 0.4950000047683716, |
|
"ranking_idealized_expo": 0.4950000047683716, |
|
"ranking_simple": 0.5687500238418579, |
|
"regularize": 114.66310119628906, |
|
"step": 300, |
|
"wo_beta": 6.086948871612549 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 188.05804443359375, |
|
"eval_logits": -0.994479775428772, |
|
"eval_logps": -84.29710388183594, |
|
"eval_loss": 192.52310180664062, |
|
"eval_objective": 188.05804443359375, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5357142686843872, |
|
"eval_regularize": 188.05804443359375, |
|
"eval_runtime": 308.8243, |
|
"eval_samples_per_second": 18.749, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 6.938241958618164, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 116.39104461669922, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 14112.952284083942, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.9387365579605103, |
|
"logps": -79.04769897460938, |
|
"loss": 114.0792, |
|
"objective": 116.39104461669922, |
|
"ranking_idealized": 0.5254166722297668, |
|
"ranking_idealized_expo": 0.5254166722297668, |
|
"ranking_simple": 0.5975000262260437, |
|
"regularize": 116.39104461669922, |
|
"step": 350, |
|
"wo_beta": 5.450421333312988 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 192.3919677734375, |
|
"eval_logits": -1.0045075416564941, |
|
"eval_logps": -82.11246490478516, |
|
"eval_loss": 205.77967834472656, |
|
"eval_objective": 192.3919677734375, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 192.3919677734375, |
|
"eval_runtime": 308.4486, |
|
"eval_samples_per_second": 18.771, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.923487663269043, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 73.98444366455078, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 7172.9062533852075, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.8197985887527466, |
|
"logps": -76.91300201416016, |
|
"loss": 72.4145, |
|
"objective": 73.98444366455078, |
|
"ranking_idealized": 0.5320833325386047, |
|
"ranking_idealized_expo": 0.5320833325386047, |
|
"ranking_simple": 0.621666669845581, |
|
"regularize": 73.98444366455078, |
|
"step": 400, |
|
"wo_beta": 4.771157264709473 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 204.8121795654297, |
|
"eval_logits": -0.7119522094726562, |
|
"eval_logps": -82.81564331054688, |
|
"eval_loss": 212.66128540039062, |
|
"eval_objective": 204.8121795654297, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 204.8121795654297, |
|
"eval_runtime": 308.465, |
|
"eval_samples_per_second": 18.77, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 7.0484819412231445, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 81.11451721191406, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 7257.334900105363, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.6066682934761047, |
|
"logps": -78.56853485107422, |
|
"loss": 76.9668, |
|
"objective": 81.11451721191406, |
|
"ranking_idealized": 0.5191666483879089, |
|
"ranking_idealized_expo": 0.5191666483879089, |
|
"ranking_simple": 0.6295833587646484, |
|
"regularize": 81.11451721191406, |
|
"step": 450, |
|
"wo_beta": 5.272921562194824 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 203.026123046875, |
|
"eval_logits": -0.7807133197784424, |
|
"eval_logps": -82.4189682006836, |
|
"eval_loss": 210.22909545898438, |
|
"eval_objective": 203.026123046875, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 203.026123046875, |
|
"eval_runtime": 309.148, |
|
"eval_samples_per_second": 18.729, |
|
"eval_steps_per_second": 1.562, |
|
"eval_wo_beta": 6.924361705780029, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 76.8044204711914, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 8969.209638335737, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.6817887425422668, |
|
"logps": -77.68151092529297, |
|
"loss": 77.9261, |
|
"objective": 76.8044204711914, |
|
"ranking_idealized": 0.4970833361148834, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.6170833110809326, |
|
"regularize": 76.8044204711914, |
|
"step": 500, |
|
"wo_beta": 5.202561855316162 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 202.15687561035156, |
|
"eval_logits": -0.7438479661941528, |
|
"eval_logps": -81.37281036376953, |
|
"eval_loss": 211.31556701660156, |
|
"eval_objective": 202.15687561035156, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 202.15687561035156, |
|
"eval_runtime": 308.8567, |
|
"eval_samples_per_second": 18.747, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 6.886315822601318, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 70.9330062866211, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 8040.484078292184, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.636380136013031, |
|
"logps": -77.03202056884766, |
|
"loss": 70.5755, |
|
"objective": 70.9330062866211, |
|
"ranking_idealized": 0.5104166865348816, |
|
"ranking_idealized_expo": 0.5104166865348816, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 70.9330062866211, |
|
"step": 550, |
|
"wo_beta": 4.849801063537598 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 200.1410369873047, |
|
"eval_logits": -0.6838412880897522, |
|
"eval_logps": -82.32962799072266, |
|
"eval_loss": 212.6468048095703, |
|
"eval_objective": 200.1410369873047, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5429606437683105, |
|
"eval_regularize": 200.1410369873047, |
|
"eval_runtime": 311.5527, |
|
"eval_samples_per_second": 18.584, |
|
"eval_steps_per_second": 1.55, |
|
"eval_wo_beta": 6.724142551422119, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 65.89614868164062, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 7186.548213944491, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.510294497013092, |
|
"logps": -78.14505004882812, |
|
"loss": 69.6026, |
|
"objective": 65.89614868164062, |
|
"ranking_idealized": 0.5149999856948853, |
|
"ranking_idealized_expo": 0.5149999856948853, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 65.89614868164062, |
|
"step": 600, |
|
"wo_beta": 4.862266540527344 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 196.96690368652344, |
|
"eval_logits": -0.5568598508834839, |
|
"eval_logps": -80.71286010742188, |
|
"eval_loss": 212.02540588378906, |
|
"eval_objective": 196.96690368652344, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5419254899024963, |
|
"eval_regularize": 196.96690368652344, |
|
"eval_runtime": 308.7949, |
|
"eval_samples_per_second": 18.75, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 6.6975417137146, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 69.19684600830078, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 7556.588020373631, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.5819191336631775, |
|
"logps": -75.77037048339844, |
|
"loss": 69.7829, |
|
"objective": 69.19684600830078, |
|
"ranking_idealized": 0.527916669845581, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.6395833492279053, |
|
"regularize": 69.19684600830078, |
|
"step": 650, |
|
"wo_beta": 5.048923969268799 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 209.67823791503906, |
|
"eval_logits": -0.7061935663223267, |
|
"eval_logps": -79.49684143066406, |
|
"eval_loss": 222.27664184570312, |
|
"eval_objective": 209.67823791503906, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5403726696968079, |
|
"eval_regularize": 209.67823791503906, |
|
"eval_runtime": 308.514, |
|
"eval_samples_per_second": 18.767, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.654106140136719, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 60.93992233276367, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 7674.886013172465, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.5295735597610474, |
|
"logps": -76.11310577392578, |
|
"loss": 62.7864, |
|
"objective": 60.93992233276367, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5108333230018616, |
|
"ranking_simple": 0.6387500166893005, |
|
"regularize": 60.93992233276367, |
|
"step": 700, |
|
"wo_beta": 4.921832084655762 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 213.3031463623047, |
|
"eval_logits": -0.6268911957740784, |
|
"eval_logps": -80.2667236328125, |
|
"eval_loss": 226.3468017578125, |
|
"eval_objective": 213.3031463623047, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5398550629615784, |
|
"eval_regularize": 213.3031463623047, |
|
"eval_runtime": 312.7433, |
|
"eval_samples_per_second": 18.514, |
|
"eval_steps_per_second": 1.544, |
|
"eval_wo_beta": 6.621464252471924, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 37.5257682800293, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 6013.443827898748, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.507068932056427, |
|
"logps": -76.31243133544922, |
|
"loss": 37.3326, |
|
"objective": 37.5257682800293, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 37.5257682800293, |
|
"step": 750, |
|
"wo_beta": 4.600498676300049 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 208.872314453125, |
|
"eval_logits": -0.7007076144218445, |
|
"eval_logps": -80.56648254394531, |
|
"eval_loss": 219.77853393554688, |
|
"eval_objective": 208.872314453125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5439958572387695, |
|
"eval_regularize": 208.872314453125, |
|
"eval_runtime": 308.6702, |
|
"eval_samples_per_second": 18.758, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 6.726495265960693, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 32.680686950683594, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 5530.199901679942, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.5285750031471252, |
|
"logps": -77.53636932373047, |
|
"loss": 33.2099, |
|
"objective": 32.680686950683594, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6654166579246521, |
|
"regularize": 32.680686950683594, |
|
"step": 800, |
|
"wo_beta": 4.228292465209961 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 207.68812561035156, |
|
"eval_logits": -0.5673097968101501, |
|
"eval_logps": -81.89009857177734, |
|
"eval_loss": 221.87864685058594, |
|
"eval_objective": 207.68812561035156, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5450310707092285, |
|
"eval_regularize": 207.68812561035156, |
|
"eval_runtime": 308.8922, |
|
"eval_samples_per_second": 18.744, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 6.671654224395752, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 34.45829772949219, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 4586.247966907105, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.49624624848365784, |
|
"logps": -79.02293395996094, |
|
"loss": 33.915, |
|
"objective": 34.45829772949219, |
|
"ranking_idealized": 0.5141666531562805, |
|
"ranking_idealized_expo": 0.5133333206176758, |
|
"ranking_simple": 0.6575000286102295, |
|
"regularize": 34.45829772949219, |
|
"step": 850, |
|
"wo_beta": 4.579156398773193 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 205.05154418945312, |
|
"eval_logits": -0.5177903771400452, |
|
"eval_logps": -81.91341400146484, |
|
"eval_loss": 217.6955108642578, |
|
"eval_objective": 205.05154418945312, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.542443037033081, |
|
"eval_regularize": 205.05154418945312, |
|
"eval_runtime": 308.6493, |
|
"eval_samples_per_second": 18.759, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 6.724928855895996, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 39.46757888793945, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 5711.491250646742, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.44471094012260437, |
|
"logps": -76.97750091552734, |
|
"loss": 35.3572, |
|
"objective": 39.46757888793945, |
|
"ranking_idealized": 0.5183333158493042, |
|
"ranking_idealized_expo": 0.5179166793823242, |
|
"ranking_simple": 0.6600000262260437, |
|
"regularize": 39.46757888793945, |
|
"step": 900, |
|
"wo_beta": 4.633584022521973 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 214.5051727294922, |
|
"eval_logits": -0.47291529178619385, |
|
"eval_logps": -81.58795166015625, |
|
"eval_loss": 224.5401611328125, |
|
"eval_objective": 214.5051727294922, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.54347825050354, |
|
"eval_regularize": 214.5051727294922, |
|
"eval_runtime": 308.5271, |
|
"eval_samples_per_second": 18.767, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.827796936035156, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 30.270259857177734, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 4986.6651394423525, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.46939340233802795, |
|
"logps": -76.20037841796875, |
|
"loss": 31.032, |
|
"objective": 30.270259857177734, |
|
"ranking_idealized": 0.5058333277702332, |
|
"ranking_idealized_expo": 0.5058333277702332, |
|
"ranking_simple": 0.6645833253860474, |
|
"regularize": 30.270259857177734, |
|
"step": 950, |
|
"wo_beta": 4.50759220123291 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 216.58029174804688, |
|
"eval_logits": -0.55417799949646, |
|
"eval_logps": -80.34796905517578, |
|
"eval_loss": 225.29067993164062, |
|
"eval_objective": 216.58029174804688, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5419254899024963, |
|
"eval_regularize": 216.58029174804688, |
|
"eval_runtime": 309.3192, |
|
"eval_samples_per_second": 18.719, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 6.8429484367370605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"step": 950, |
|
"total_flos": 0.0, |
|
"train_loss": 70.38767440143384, |
|
"train_runtime": 26138.5983, |
|
"train_samples_per_second": 9.718, |
|
"train_steps_per_second": 0.067 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|