zephyr-7b-dpo-qlora / trainer_state.json
YYYYYYibo's picture
Model save
3e29067 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5625e-07,
"logits/chosen": -2.7731900215148926,
"logits/rejected": -2.6362287998199463,
"logps/chosen": -356.1260070800781,
"logps/rejected": -311.3892822265625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.5625e-06,
"logits/chosen": -2.3915464878082275,
"logits/rejected": -2.3424172401428223,
"logps/chosen": -243.08827209472656,
"logps/rejected": -240.88124084472656,
"loss": 0.6927,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.004824994597584009,
"rewards/margins": 0.001562346238642931,
"rewards/rejected": 0.003262649057433009,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -2.6855998039245605,
"logits/rejected": -2.503112316131592,
"logps/chosen": -276.1568908691406,
"logps/rejected": -245.57150268554688,
"loss": 0.687,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.04376252368092537,
"rewards/margins": 0.011996113695204258,
"rewards/rejected": 0.03176640719175339,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 4.6875000000000004e-06,
"logits/chosen": -2.5015687942504883,
"logits/rejected": -2.448686122894287,
"logps/chosen": -244.99642944335938,
"logps/rejected": -249.10916137695312,
"loss": 0.6832,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.05081823468208313,
"rewards/margins": 0.018069546669721603,
"rewards/rejected": 0.032748688012361526,
"step": 30
},
{
"epoch": 0.13,
"learning_rate": 4.989935734988098e-06,
"logits/chosen": -2.5158021450042725,
"logits/rejected": -2.337573289871216,
"logps/chosen": -272.7821350097656,
"logps/rejected": -227.36007690429688,
"loss": 0.6668,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0395994558930397,
"rewards/margins": 0.04740050435066223,
"rewards/rejected": -0.00780104985460639,
"step": 40
},
{
"epoch": 0.16,
"learning_rate": 4.949188496058089e-06,
"logits/chosen": -2.430145502090454,
"logits/rejected": -2.4263150691986084,
"logps/chosen": -249.273681640625,
"logps/rejected": -266.5956726074219,
"loss": 0.6528,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.02035255916416645,
"rewards/margins": 0.059511054307222366,
"rewards/rejected": -0.03915848955512047,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 4.8776412907378845e-06,
"logits/chosen": -2.4893181324005127,
"logits/rejected": -2.418604612350464,
"logps/chosen": -301.8047790527344,
"logps/rejected": -252.42892456054688,
"loss": 0.6591,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.06781601160764694,
"rewards/margins": 0.07723621279001236,
"rewards/rejected": -0.1450522094964981,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 4.7761938666470405e-06,
"logits/chosen": -2.4578957557678223,
"logits/rejected": -2.4078097343444824,
"logps/chosen": -259.1146545410156,
"logps/rejected": -255.2762908935547,
"loss": 0.6412,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.09445185959339142,
"rewards/margins": 0.15250881016254425,
"rewards/rejected": -0.24696068465709686,
"step": 70
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -2.5219717025756836,
"logits/rejected": -2.3697924613952637,
"logps/chosen": -289.8721618652344,
"logps/rejected": -306.9769287109375,
"loss": 0.6357,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.21299012005329132,
"rewards/margins": 0.16702046990394592,
"rewards/rejected": -0.38001060485839844,
"step": 80
},
{
"epoch": 0.29,
"learning_rate": 4.4890613722044526e-06,
"logits/chosen": -2.427962064743042,
"logits/rejected": -2.326305866241455,
"logps/chosen": -265.3756408691406,
"logps/rejected": -262.7252197265625,
"loss": 0.6272,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16604574024677277,
"rewards/margins": 0.19639183580875397,
"rewards/rejected": -0.36243754625320435,
"step": 90
},
{
"epoch": 0.32,
"learning_rate": 4.3069871595684795e-06,
"logits/chosen": -2.213723659515381,
"logits/rejected": -2.217102527618408,
"logps/chosen": -245.6179962158203,
"logps/rejected": -293.12518310546875,
"loss": 0.6269,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.38440248370170593,
"rewards/margins": 0.126637801527977,
"rewards/rejected": -0.5110402703285217,
"step": 100
},
{
"epoch": 0.32,
"eval_logits/chosen": -2.3665878772735596,
"eval_logits/rejected": -2.256598949432373,
"eval_logps/chosen": -287.2168884277344,
"eval_logps/rejected": -285.49847412109375,
"eval_loss": 0.6268974542617798,
"eval_rewards/accuracies": 0.6819999814033508,
"eval_rewards/chosen": -0.23765824735164642,
"eval_rewards/margins": 0.2054254114627838,
"eval_rewards/rejected": -0.44308364391326904,
"eval_runtime": 543.2957,
"eval_samples_per_second": 3.681,
"eval_steps_per_second": 0.46,
"step": 100
},
{
"epoch": 0.35,
"learning_rate": 4.102189034962561e-06,
"logits/chosen": -2.338050603866577,
"logits/rejected": -2.2199347019195557,
"logps/chosen": -304.7019958496094,
"logps/rejected": -283.5575256347656,
"loss": 0.6232,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.18706437945365906,
"rewards/margins": 0.24619019031524658,
"rewards/rejected": -0.43325456976890564,
"step": 110
},
{
"epoch": 0.38,
"learning_rate": 3.8772424536302565e-06,
"logits/chosen": -2.199939250946045,
"logits/rejected": -2.1462173461914062,
"logps/chosen": -280.5738525390625,
"logps/rejected": -272.75537109375,
"loss": 0.6256,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.23016035556793213,
"rewards/margins": 0.2529276907444,
"rewards/rejected": -0.48308807611465454,
"step": 120
},
{
"epoch": 0.42,
"learning_rate": 3.634976249348867e-06,
"logits/chosen": -2.4285922050476074,
"logits/rejected": -2.252119541168213,
"logps/chosen": -337.8984375,
"logps/rejected": -329.4248962402344,
"loss": 0.6299,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.27586501836776733,
"rewards/margins": 0.285078763961792,
"rewards/rejected": -0.5609437823295593,
"step": 130
},
{
"epoch": 0.45,
"learning_rate": 3.3784370602033572e-06,
"logits/chosen": -2.072373628616333,
"logits/rejected": -1.9053455591201782,
"logps/chosen": -251.76571655273438,
"logps/rejected": -285.0694885253906,
"loss": 0.6067,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.5010747909545898,
"rewards/margins": 0.26867786049842834,
"rewards/rejected": -0.7697526216506958,
"step": 140
},
{
"epoch": 0.48,
"learning_rate": 3.1108510153447352e-06,
"logits/chosen": -2.21221661567688,
"logits/rejected": -2.136280059814453,
"logps/chosen": -338.2016296386719,
"logps/rejected": -331.0526428222656,
"loss": 0.608,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.639680027961731,
"rewards/margins": 0.2550516426563263,
"rewards/rejected": -0.8947317004203796,
"step": 150
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -2.2209646701812744,
"logits/rejected": -2.022948980331421,
"logps/chosen": -377.3534851074219,
"logps/rejected": -344.77252197265625,
"loss": 0.5937,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.7194479703903198,
"rewards/margins": 0.39620086550712585,
"rewards/rejected": -1.115648865699768,
"step": 160
},
{
"epoch": 0.54,
"learning_rate": 2.556095160739513e-06,
"logits/chosen": -2.1350314617156982,
"logits/rejected": -1.85476553440094,
"logps/chosen": -351.29638671875,
"logps/rejected": -354.8650817871094,
"loss": 0.6069,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.7903974652290344,
"rewards/margins": 0.24958536028862,
"rewards/rejected": -1.039982795715332,
"step": 170
},
{
"epoch": 0.58,
"learning_rate": 2.2759017277414165e-06,
"logits/chosen": -2.0943400859832764,
"logits/rejected": -1.8893616199493408,
"logps/chosen": -322.147216796875,
"logps/rejected": -327.81304931640625,
"loss": 0.6252,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.6768954992294312,
"rewards/margins": 0.20395174622535706,
"rewards/rejected": -0.8808472752571106,
"step": 180
},
{
"epoch": 0.61,
"learning_rate": 1.9985264605418185e-06,
"logits/chosen": -1.9419981241226196,
"logits/rejected": -1.7324016094207764,
"logps/chosen": -328.23760986328125,
"logps/rejected": -314.13922119140625,
"loss": 0.584,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.4452829360961914,
"rewards/margins": 0.4189114570617676,
"rewards/rejected": -0.864194393157959,
"step": 190
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -2.144902229309082,
"logits/rejected": -1.7156444787979126,
"logps/chosen": -362.327880859375,
"logps/rejected": -322.9747619628906,
"loss": 0.6332,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.4486660957336426,
"rewards/margins": 0.3436250388622284,
"rewards/rejected": -0.7922911047935486,
"step": 200
},
{
"epoch": 0.64,
"eval_logits/chosen": -1.893760323524475,
"eval_logits/rejected": -1.6871448755264282,
"eval_logps/chosen": -322.544189453125,
"eval_logps/rejected": -337.0687255859375,
"eval_loss": 0.5820500254631042,
"eval_rewards/accuracies": 0.7059999704360962,
"eval_rewards/chosen": -0.5909315943717957,
"eval_rewards/margins": 0.3678547739982605,
"eval_rewards/rejected": -0.9587863683700562,
"eval_runtime": 543.1459,
"eval_samples_per_second": 3.682,
"eval_steps_per_second": 0.46,
"step": 200
},
{
"epoch": 0.67,
"learning_rate": 1.466103737583699e-06,
"logits/chosen": -1.8559290170669556,
"logits/rejected": -1.7014697790145874,
"logps/chosen": -324.19256591796875,
"logps/rejected": -352.70697021484375,
"loss": 0.552,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.6294658780097961,
"rewards/margins": 0.45733365416526794,
"rewards/rejected": -1.0867995023727417,
"step": 210
},
{
"epoch": 0.7,
"learning_rate": 1.217751806485235e-06,
"logits/chosen": -1.8568174839019775,
"logits/rejected": -1.6362855434417725,
"logps/chosen": -356.0939636230469,
"logps/rejected": -389.1434326171875,
"loss": 0.5765,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6206706762313843,
"rewards/margins": 0.5877247452735901,
"rewards/rejected": -1.2083956003189087,
"step": 220
},
{
"epoch": 0.74,
"learning_rate": 9.855248903979505e-07,
"logits/chosen": -1.9677798748016357,
"logits/rejected": -1.8001766204833984,
"logps/chosen": -333.63409423828125,
"logps/rejected": -372.82232666015625,
"loss": 0.5961,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6273213028907776,
"rewards/margins": 0.4957484304904938,
"rewards/rejected": -1.1230696439743042,
"step": 230
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -1.6269299983978271,
"logits/rejected": -1.5314247608184814,
"logps/chosen": -343.7135314941406,
"logps/rejected": -360.920166015625,
"loss": 0.5733,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.6133186221122742,
"rewards/margins": 0.3855026662349701,
"rewards/rejected": -0.9988213777542114,
"step": 240
},
{
"epoch": 0.8,
"learning_rate": 5.808881491049723e-07,
"logits/chosen": -1.8092960119247437,
"logits/rejected": -1.4363586902618408,
"logps/chosen": -302.46234130859375,
"logps/rejected": -305.09393310546875,
"loss": 0.5822,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.7107473611831665,
"rewards/margins": 0.3086285889148712,
"rewards/rejected": -1.0193760395050049,
"step": 250
},
{
"epoch": 0.83,
"learning_rate": 4.1356686569674344e-07,
"logits/chosen": -2.0522053241729736,
"logits/rejected": -1.6467043161392212,
"logps/chosen": -370.14019775390625,
"logps/rejected": -350.79364013671875,
"loss": 0.5594,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.6428462266921997,
"rewards/margins": 0.41562938690185547,
"rewards/rejected": -1.0584756135940552,
"step": 260
},
{
"epoch": 0.86,
"learning_rate": 2.7248368952908055e-07,
"logits/chosen": -1.736702561378479,
"logits/rejected": -1.5174537897109985,
"logps/chosen": -293.9969177246094,
"logps/rejected": -317.34844970703125,
"loss": 0.5803,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.5701481103897095,
"rewards/margins": 0.500605583190918,
"rewards/rejected": -1.0707536935806274,
"step": 270
},
{
"epoch": 0.9,
"learning_rate": 1.59412823400657e-07,
"logits/chosen": -1.6159837245941162,
"logits/rejected": -1.2289941310882568,
"logps/chosen": -342.9421081542969,
"logps/rejected": -372.6164855957031,
"loss": 0.5578,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.7729519605636597,
"rewards/margins": 0.49912238121032715,
"rewards/rejected": -1.2720743417739868,
"step": 280
},
{
"epoch": 0.93,
"learning_rate": 7.577619905828281e-08,
"logits/chosen": -1.502423644065857,
"logits/rejected": -1.456081509590149,
"logps/chosen": -329.8805236816406,
"logps/rejected": -356.34417724609375,
"loss": 0.5827,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.678324818611145,
"rewards/margins": 0.42007485032081604,
"rewards/rejected": -1.0983997583389282,
"step": 290
},
{
"epoch": 0.96,
"learning_rate": 2.262559558016325e-08,
"logits/chosen": -1.6769917011260986,
"logits/rejected": -1.406165361404419,
"logps/chosen": -328.3318786621094,
"logps/rejected": -353.73968505859375,
"loss": 0.5648,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.6493935585021973,
"rewards/margins": 0.43068727850914,
"rewards/rejected": -1.0800807476043701,
"step": 300
},
{
"epoch": 0.96,
"eval_logits/chosen": -1.7015434503555298,
"eval_logits/rejected": -1.4598934650421143,
"eval_logps/chosen": -331.1508483886719,
"eval_logps/rejected": -351.8941955566406,
"eval_loss": 0.5735270977020264,
"eval_rewards/accuracies": 0.6940000057220459,
"eval_rewards/chosen": -0.6769981980323792,
"eval_rewards/margins": 0.4300425946712494,
"eval_rewards/rejected": -1.1070406436920166,
"eval_runtime": 542.9185,
"eval_samples_per_second": 3.684,
"eval_steps_per_second": 0.46,
"step": 300
},
{
"epoch": 0.99,
"learning_rate": 6.294126437336734e-10,
"logits/chosen": -1.7649977207183838,
"logits/rejected": -1.521240234375,
"logps/chosen": -326.1722717285156,
"logps/rejected": -356.2889709472656,
"loss": 0.5603,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.6321894526481628,
"rewards/margins": 0.4693359434604645,
"rewards/rejected": -1.1015253067016602,
"step": 310
},
{
"epoch": 1.0,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.6116275028922619,
"train_runtime": 6907.8509,
"train_samples_per_second": 1.448,
"train_steps_per_second": 0.045
}
],
"logging_steps": 10,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}