|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.984, |
|
"eval_steps": 100, |
|
"global_step": 124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": 0.02903342992067337, |
|
"logits/rejected": 0.16799500584602356, |
|
"logps/chosen": -204.7097930908203, |
|
"logps/rejected": -186.28207397460938, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 0.11909348517656326, |
|
"logits/rejected": 0.1480591893196106, |
|
"logps/chosen": -174.38600158691406, |
|
"logps/rejected": -139.39389038085938, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": 0.0004580159147735685, |
|
"rewards/margins": -5.351095023797825e-05, |
|
"rewards/rejected": 0.0005115267704240978, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.951096619903317e-06, |
|
"logits/chosen": 0.1915196031332016, |
|
"logits/rejected": 0.26785004138946533, |
|
"logps/chosen": -186.1024169921875, |
|
"logps/rejected": -150.16329956054688, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 8.462425466859713e-05, |
|
"rewards/margins": 0.00011474495113361627, |
|
"rewards/rejected": -3.012050910911057e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.716164218065246e-06, |
|
"logits/chosen": 0.09372388571500778, |
|
"logits/rejected": 0.09187857806682587, |
|
"logps/chosen": -189.90634155273438, |
|
"logps/rejected": -176.62911987304688, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0012555012945085764, |
|
"rewards/margins": 0.0008926725131459534, |
|
"rewards/rejected": 0.0003628287522587925, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3048902348863116e-06, |
|
"logits/chosen": 0.21573364734649658, |
|
"logits/rejected": 0.1305653154850006, |
|
"logps/chosen": -178.8751220703125, |
|
"logps/rejected": -151.7847137451172, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": 0.0010144033003598452, |
|
"rewards/margins": -0.0007725629839114845, |
|
"rewards/rejected": 0.0017869662260636687, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 0.10782618820667267, |
|
"logits/rejected": 0.1629345715045929, |
|
"logps/chosen": -185.59564208984375, |
|
"logps/rejected": -174.7571563720703, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.00029209800413809717, |
|
"rewards/margins": -0.0012864455347880721, |
|
"rewards/rejected": 0.0009943475015461445, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.0956464785579125e-06, |
|
"logits/chosen": 0.19225239753723145, |
|
"logits/rejected": 0.18530502915382385, |
|
"logps/chosen": -194.72091674804688, |
|
"logps/rejected": -159.44725036621094, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.002120513701811433, |
|
"rewards/margins": 0.0013710735365748405, |
|
"rewards/rejected": 0.0007494401070289314, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.39389699200963e-06, |
|
"logits/chosen": 0.18958896398544312, |
|
"logits/rejected": 0.14928244054317474, |
|
"logps/chosen": -196.82078552246094, |
|
"logps/rejected": -172.227294921875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.00045417825458571315, |
|
"rewards/margins": -0.0015611432027071714, |
|
"rewards/rejected": 0.0011069647734984756, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.700590188571887e-06, |
|
"logits/chosen": 0.15342679619789124, |
|
"logits/rejected": 0.13346508145332336, |
|
"logps/chosen": -176.46441650390625, |
|
"logps/rejected": -149.63717651367188, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": 0.0011017677607014775, |
|
"rewards/margins": 0.0006318273372016847, |
|
"rewards/rejected": 0.000469940627226606, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0708929268538034e-06, |
|
"logits/chosen": 0.19051842391490936, |
|
"logits/rejected": 0.15732800960540771, |
|
"logps/chosen": -183.8511199951172, |
|
"logps/rejected": -157.0377197265625, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.000780799426138401, |
|
"rewards/margins": -0.000656499934848398, |
|
"rewards/rejected": 0.0014372995356097817, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.549106142039018e-07, |
|
"logits/chosen": 0.16213683784008026, |
|
"logits/rejected": 0.04842492565512657, |
|
"logps/chosen": -177.85955810546875, |
|
"logps/rejected": -152.51058959960938, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": 0.0025749087799340487, |
|
"rewards/margins": 0.0006177256000228226, |
|
"rewards/rejected": 0.001957183238118887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -0.007027674000710249, |
|
"eval_logits/rejected": 0.09070703387260437, |
|
"eval_logps/chosen": -306.4146728515625, |
|
"eval_logps/rejected": -278.6007995605469, |
|
"eval_loss": 0.010791419073939323, |
|
"eval_rewards/accuracies": 0.4830000102519989, |
|
"eval_rewards/chosen": 0.001202387735247612, |
|
"eval_rewards/margins": -0.00015190700651146472, |
|
"eval_rewards/rejected": 0.001354294829070568, |
|
"eval_runtime": 432.9902, |
|
"eval_samples_per_second": 4.619, |
|
"eval_steps_per_second": 1.155, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.937002879188285e-07, |
|
"logits/chosen": 0.22388437390327454, |
|
"logits/rejected": 0.19077368080615997, |
|
"logps/chosen": -205.4290008544922, |
|
"logps/rejected": -172.20669555664062, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.00022258120588958263, |
|
"rewards/margins": -0.00020028270955663174, |
|
"rewards/rejected": 0.0004228639299981296, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.6003680950742728e-08, |
|
"logits/chosen": 0.17824237048625946, |
|
"logits/rejected": 0.234628364443779, |
|
"logps/chosen": -184.6617431640625, |
|
"logps/rejected": -160.73251342773438, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0013891400303691626, |
|
"rewards/margins": 9.483665053267032e-05, |
|
"rewards/rejected": 0.0012943033361807466, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"step": 124, |
|
"total_flos": 0.0, |
|
"train_loss": 0.010348274312432735, |
|
"train_runtime": 1408.9952, |
|
"train_samples_per_second": 1.419, |
|
"train_steps_per_second": 0.088 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|