|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": -2.7731900215148926, |
|
"logits/rejected": -2.6362287998199463, |
|
"logps/chosen": -356.1260070800781, |
|
"logps/rejected": -311.3892822265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.3915464878082275, |
|
"logits/rejected": -2.3424172401428223, |
|
"logps/chosen": -243.08827209472656, |
|
"logps/rejected": -240.88124084472656, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004824994597584009, |
|
"rewards/margins": 0.001562346238642931, |
|
"rewards/rejected": 0.003262649057433009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.6855998039245605, |
|
"logits/rejected": -2.503112316131592, |
|
"logps/chosen": -276.1568908691406, |
|
"logps/rejected": -245.57150268554688, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04376252368092537, |
|
"rewards/margins": 0.011996113695204258, |
|
"rewards/rejected": 0.03176640719175339, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.5015687942504883, |
|
"logits/rejected": -2.448686122894287, |
|
"logps/chosen": -244.99642944335938, |
|
"logps/rejected": -249.10916137695312, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05081823468208313, |
|
"rewards/margins": 0.018069546669721603, |
|
"rewards/rejected": 0.032748688012361526, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.5158021450042725, |
|
"logits/rejected": -2.337573289871216, |
|
"logps/chosen": -272.7821350097656, |
|
"logps/rejected": -227.36007690429688, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0395994558930397, |
|
"rewards/margins": 0.04740050435066223, |
|
"rewards/rejected": -0.00780104985460639, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-06, |
|
"logits/chosen": -2.430145502090454, |
|
"logits/rejected": -2.4263150691986084, |
|
"logps/chosen": -249.273681640625, |
|
"logps/rejected": -266.5956726074219, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02035255916416645, |
|
"rewards/margins": 0.059511054307222366, |
|
"rewards/rejected": -0.03915848955512047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.4893181324005127, |
|
"logits/rejected": -2.418604612350464, |
|
"logps/chosen": -301.8047790527344, |
|
"logps/rejected": -252.42892456054688, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.06781601160764694, |
|
"rewards/margins": 0.07723621279001236, |
|
"rewards/rejected": -0.1450522094964981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7761938666470405e-06, |
|
"logits/chosen": -2.4578957557678223, |
|
"logits/rejected": -2.4078097343444824, |
|
"logps/chosen": -259.1146545410156, |
|
"logps/rejected": -255.2762908935547, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09445185959339142, |
|
"rewards/margins": 0.15250881016254425, |
|
"rewards/rejected": -0.24696068465709686, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.5219717025756836, |
|
"logits/rejected": -2.3697924613952637, |
|
"logps/chosen": -289.8721618652344, |
|
"logps/rejected": -306.9769287109375, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.21299012005329132, |
|
"rewards/margins": 0.16702046990394592, |
|
"rewards/rejected": -0.38001060485839844, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4890613722044526e-06, |
|
"logits/chosen": -2.427962064743042, |
|
"logits/rejected": -2.326305866241455, |
|
"logps/chosen": -265.3756408691406, |
|
"logps/rejected": -262.7252197265625, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16604574024677277, |
|
"rewards/margins": 0.19639183580875397, |
|
"rewards/rejected": -0.36243754625320435, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -2.213723659515381, |
|
"logits/rejected": -2.217102527618408, |
|
"logps/chosen": -245.6179962158203, |
|
"logps/rejected": -293.12518310546875, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.38440248370170593, |
|
"rewards/margins": 0.126637801527977, |
|
"rewards/rejected": -0.5110402703285217, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -2.3665878772735596, |
|
"eval_logits/rejected": -2.256598949432373, |
|
"eval_logps/chosen": -287.2168884277344, |
|
"eval_logps/rejected": -285.49847412109375, |
|
"eval_loss": 0.6268974542617798, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": -0.23765824735164642, |
|
"eval_rewards/margins": 0.2054254114627838, |
|
"eval_rewards/rejected": -0.44308364391326904, |
|
"eval_runtime": 543.2957, |
|
"eval_samples_per_second": 3.681, |
|
"eval_steps_per_second": 0.46, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.102189034962561e-06, |
|
"logits/chosen": -2.338050603866577, |
|
"logits/rejected": -2.2199347019195557, |
|
"logps/chosen": -304.7019958496094, |
|
"logps/rejected": -283.5575256347656, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18706437945365906, |
|
"rewards/margins": 0.24619019031524658, |
|
"rewards/rejected": -0.43325456976890564, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.199939250946045, |
|
"logits/rejected": -2.1462173461914062, |
|
"logps/chosen": -280.5738525390625, |
|
"logps/rejected": -272.75537109375, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23016035556793213, |
|
"rewards/margins": 0.2529276907444, |
|
"rewards/rejected": -0.48308807611465454, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-06, |
|
"logits/chosen": -2.4285922050476074, |
|
"logits/rejected": -2.252119541168213, |
|
"logps/chosen": -337.8984375, |
|
"logps/rejected": -329.4248962402344, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27586501836776733, |
|
"rewards/margins": 0.285078763961792, |
|
"rewards/rejected": -0.5609437823295593, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -2.072373628616333, |
|
"logits/rejected": -1.9053455591201782, |
|
"logps/chosen": -251.76571655273438, |
|
"logps/rejected": -285.0694885253906, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5010747909545898, |
|
"rewards/margins": 0.26867786049842834, |
|
"rewards/rejected": -0.7697526216506958, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1108510153447352e-06, |
|
"logits/chosen": -2.21221661567688, |
|
"logits/rejected": -2.136280059814453, |
|
"logps/chosen": -338.2016296386719, |
|
"logps/rejected": -331.0526428222656, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.639680027961731, |
|
"rewards/margins": 0.2550516426563263, |
|
"rewards/rejected": -0.8947317004203796, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.2209646701812744, |
|
"logits/rejected": -2.022948980331421, |
|
"logps/chosen": -377.3534851074219, |
|
"logps/rejected": -344.77252197265625, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7194479703903198, |
|
"rewards/margins": 0.39620086550712585, |
|
"rewards/rejected": -1.115648865699768, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.556095160739513e-06, |
|
"logits/chosen": -2.1350314617156982, |
|
"logits/rejected": -1.85476553440094, |
|
"logps/chosen": -351.29638671875, |
|
"logps/rejected": -354.8650817871094, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7903974652290344, |
|
"rewards/margins": 0.24958536028862, |
|
"rewards/rejected": -1.039982795715332, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -2.0943400859832764, |
|
"logits/rejected": -1.8893616199493408, |
|
"logps/chosen": -322.147216796875, |
|
"logps/rejected": -327.81304931640625, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6768954992294312, |
|
"rewards/margins": 0.20395174622535706, |
|
"rewards/rejected": -0.8808472752571106, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9985264605418185e-06, |
|
"logits/chosen": -1.9419981241226196, |
|
"logits/rejected": -1.7324016094207764, |
|
"logps/chosen": -328.23760986328125, |
|
"logps/rejected": -314.13922119140625, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4452829360961914, |
|
"rewards/margins": 0.4189114570617676, |
|
"rewards/rejected": -0.864194393157959, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.144902229309082, |
|
"logits/rejected": -1.7156444787979126, |
|
"logps/chosen": -362.327880859375, |
|
"logps/rejected": -322.9747619628906, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4486660957336426, |
|
"rewards/margins": 0.3436250388622284, |
|
"rewards/rejected": -0.7922911047935486, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -1.893760323524475, |
|
"eval_logits/rejected": -1.6871448755264282, |
|
"eval_logps/chosen": -322.544189453125, |
|
"eval_logps/rejected": -337.0687255859375, |
|
"eval_loss": 0.5820500254631042, |
|
"eval_rewards/accuracies": 0.7059999704360962, |
|
"eval_rewards/chosen": -0.5909315943717957, |
|
"eval_rewards/margins": 0.3678547739982605, |
|
"eval_rewards/rejected": -0.9587863683700562, |
|
"eval_runtime": 543.1459, |
|
"eval_samples_per_second": 3.682, |
|
"eval_steps_per_second": 0.46, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.466103737583699e-06, |
|
"logits/chosen": -1.8559290170669556, |
|
"logits/rejected": -1.7014697790145874, |
|
"logps/chosen": -324.19256591796875, |
|
"logps/rejected": -352.70697021484375, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6294658780097961, |
|
"rewards/margins": 0.45733365416526794, |
|
"rewards/rejected": -1.0867995023727417, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -1.8568174839019775, |
|
"logits/rejected": -1.6362855434417725, |
|
"logps/chosen": -356.0939636230469, |
|
"logps/rejected": -389.1434326171875, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6206706762313843, |
|
"rewards/margins": 0.5877247452735901, |
|
"rewards/rejected": -1.2083956003189087, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-07, |
|
"logits/chosen": -1.9677798748016357, |
|
"logits/rejected": -1.8001766204833984, |
|
"logps/chosen": -333.63409423828125, |
|
"logps/rejected": -372.82232666015625, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6273213028907776, |
|
"rewards/margins": 0.4957484304904938, |
|
"rewards/rejected": -1.1230696439743042, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -1.6269299983978271, |
|
"logits/rejected": -1.5314247608184814, |
|
"logps/chosen": -343.7135314941406, |
|
"logps/rejected": -360.920166015625, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6133186221122742, |
|
"rewards/margins": 0.3855026662349701, |
|
"rewards/rejected": -0.9988213777542114, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049723e-07, |
|
"logits/chosen": -1.8092960119247437, |
|
"logits/rejected": -1.4363586902618408, |
|
"logps/chosen": -302.46234130859375, |
|
"logps/rejected": -305.09393310546875, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7107473611831665, |
|
"rewards/margins": 0.3086285889148712, |
|
"rewards/rejected": -1.0193760395050049, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -2.0522053241729736, |
|
"logits/rejected": -1.6467043161392212, |
|
"logps/chosen": -370.14019775390625, |
|
"logps/rejected": -350.79364013671875, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6428462266921997, |
|
"rewards/margins": 0.41562938690185547, |
|
"rewards/rejected": -1.0584756135940552, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"logits/chosen": -1.736702561378479, |
|
"logits/rejected": -1.5174537897109985, |
|
"logps/chosen": -293.9969177246094, |
|
"logps/rejected": -317.34844970703125, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5701481103897095, |
|
"rewards/margins": 0.500605583190918, |
|
"rewards/rejected": -1.0707536935806274, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -1.6159837245941162, |
|
"logits/rejected": -1.2289941310882568, |
|
"logps/chosen": -342.9421081542969, |
|
"logps/rejected": -372.6164855957031, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7729519605636597, |
|
"rewards/margins": 0.49912238121032715, |
|
"rewards/rejected": -1.2720743417739868, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-08, |
|
"logits/chosen": -1.502423644065857, |
|
"logits/rejected": -1.456081509590149, |
|
"logps/chosen": -329.8805236816406, |
|
"logps/rejected": -356.34417724609375, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.678324818611145, |
|
"rewards/margins": 0.42007485032081604, |
|
"rewards/rejected": -1.0983997583389282, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -1.6769917011260986, |
|
"logits/rejected": -1.406165361404419, |
|
"logps/chosen": -328.3318786621094, |
|
"logps/rejected": -353.73968505859375, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6493935585021973, |
|
"rewards/margins": 0.43068727850914, |
|
"rewards/rejected": -1.0800807476043701, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -1.7015434503555298, |
|
"eval_logits/rejected": -1.4598934650421143, |
|
"eval_logps/chosen": -331.1508483886719, |
|
"eval_logps/rejected": -351.8941955566406, |
|
"eval_loss": 0.5735270977020264, |
|
"eval_rewards/accuracies": 0.6940000057220459, |
|
"eval_rewards/chosen": -0.6769981980323792, |
|
"eval_rewards/margins": 0.4300425946712494, |
|
"eval_rewards/rejected": -1.1070406436920166, |
|
"eval_runtime": 542.9185, |
|
"eval_samples_per_second": 3.684, |
|
"eval_steps_per_second": 0.46, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336734e-10, |
|
"logits/chosen": -1.7649977207183838, |
|
"logits/rejected": -1.521240234375, |
|
"logps/chosen": -326.1722717285156, |
|
"logps/rejected": -356.2889709472656, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6321894526481628, |
|
"rewards/margins": 0.4693359434604645, |
|
"rewards/rejected": -1.1015253067016602, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6116275028922619, |
|
"train_runtime": 6907.8509, |
|
"train_samples_per_second": 1.448, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|