|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1061.4292758130512, |
|
"learning_rate": 3.125e-09, |
|
"logits/chosen": -3.9499800205230713, |
|
"logits/rejected": -4.237819194793701, |
|
"logps/chosen": -300.693115234375, |
|
"logps/rejected": -249.96307373046875, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1100.0352396547798, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -4.129236221313477, |
|
"logits/rejected": -4.351477146148682, |
|
"logps/chosen": -351.48150634765625, |
|
"logps/rejected": -308.8130187988281, |
|
"loss": 0.7282, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": -0.027542730793356895, |
|
"rewards/margins": -0.10096076130867004, |
|
"rewards/rejected": 0.073418028652668, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1023.2979189429582, |
|
"learning_rate": 4.9899357349880975e-08, |
|
"logits/chosen": -4.195385932922363, |
|
"logits/rejected": -4.382458686828613, |
|
"logps/chosen": -334.9761962890625, |
|
"logps/rejected": -293.8690185546875, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.18794658780097961, |
|
"rewards/margins": 0.09464438259601593, |
|
"rewards/rejected": 0.09330219030380249, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 716.4790257172086, |
|
"learning_rate": 4.877641290737884e-08, |
|
"logits/chosen": -4.231568813323975, |
|
"logits/rejected": -4.364924430847168, |
|
"logps/chosen": -327.92413330078125, |
|
"logps/rejected": -295.37176513671875, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.8660896420478821, |
|
"rewards/margins": 0.425116628408432, |
|
"rewards/rejected": 0.44097304344177246, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 820.682467230606, |
|
"learning_rate": 4.646121984004665e-08, |
|
"logits/chosen": -4.1439104080200195, |
|
"logits/rejected": -4.345718860626221, |
|
"logps/chosen": -330.7722473144531, |
|
"logps/rejected": -288.60272216796875, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.6021575927734375, |
|
"rewards/margins": 0.956769585609436, |
|
"rewards/rejected": 0.6453880071640015, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 587.6344375884266, |
|
"learning_rate": 4.3069871595684784e-08, |
|
"logits/chosen": -4.235655784606934, |
|
"logits/rejected": -4.4143476486206055, |
|
"logps/chosen": -330.8318786621094, |
|
"logps/rejected": -291.75067138671875, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.010468006134033, |
|
"rewards/margins": 1.2047243118286133, |
|
"rewards/rejected": 0.8057435750961304, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 586.1895231882175, |
|
"learning_rate": 3.8772424536302564e-08, |
|
"logits/chosen": -4.247714519500732, |
|
"logits/rejected": -4.417937278747559, |
|
"logps/chosen": -322.380126953125, |
|
"logps/rejected": -291.76654052734375, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": 2.19193696975708, |
|
"rewards/margins": 1.311265230178833, |
|
"rewards/rejected": 0.8806716203689575, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 528.8947001003231, |
|
"learning_rate": 3.378437060203357e-08, |
|
"logits/chosen": -4.170197486877441, |
|
"logits/rejected": -4.359009742736816, |
|
"logps/chosen": -322.4336242675781, |
|
"logps/rejected": -289.197998046875, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": 2.203657865524292, |
|
"rewards/margins": 1.3729288578033447, |
|
"rewards/rejected": 0.8307291269302368, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 544.8408089721645, |
|
"learning_rate": 2.8355831645441387e-08, |
|
"logits/chosen": -4.038945198059082, |
|
"logits/rejected": -4.326292991638184, |
|
"logps/chosen": -348.65472412109375, |
|
"logps/rejected": -308.1936950683594, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.3145015239715576, |
|
"rewards/margins": 1.5040271282196045, |
|
"rewards/rejected": 0.8104745149612427, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 526.2097671297138, |
|
"learning_rate": 2.2759017277414164e-08, |
|
"logits/chosen": -4.170694351196289, |
|
"logits/rejected": -4.379502296447754, |
|
"logps/chosen": -335.43402099609375, |
|
"logps/rejected": -295.8087463378906, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 1.9496158361434937, |
|
"rewards/margins": 1.3254307508468628, |
|
"rewards/rejected": 0.6241849660873413, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 595.4206000749634, |
|
"learning_rate": 1.7274575140626317e-08, |
|
"logits/chosen": -4.159605979919434, |
|
"logits/rejected": -4.377494812011719, |
|
"logps/chosen": -333.1622314453125, |
|
"logps/rejected": -286.3451232910156, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": 2.0654988288879395, |
|
"rewards/margins": 1.4395055770874023, |
|
"rewards/rejected": 0.6259931921958923, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 528.6243658840883, |
|
"learning_rate": 1.217751806485235e-08, |
|
"logits/chosen": -4.138543128967285, |
|
"logits/rejected": -4.379323959350586, |
|
"logps/chosen": -314.89556884765625, |
|
"logps/rejected": -277.0162658691406, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": 2.0163025856018066, |
|
"rewards/margins": 1.3896347284317017, |
|
"rewards/rejected": 0.6266676783561707, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 557.9046270518194, |
|
"learning_rate": 7.723433775328384e-09, |
|
"logits/chosen": -4.136676788330078, |
|
"logits/rejected": -4.345128536224365, |
|
"logps/chosen": -328.94482421875, |
|
"logps/rejected": -281.4302673339844, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.9894015789031982, |
|
"rewards/margins": 1.363441824913025, |
|
"rewards/rejected": 0.625959575176239, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 523.8022952711281, |
|
"learning_rate": 4.135668656967433e-09, |
|
"logits/chosen": -4.222664833068848, |
|
"logits/rejected": -4.383507251739502, |
|
"logps/chosen": -334.4266357421875, |
|
"logps/rejected": -287.63067626953125, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.026325225830078, |
|
"rewards/margins": 1.3963022232055664, |
|
"rewards/rejected": 0.6300228834152222, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 601.0204054485596, |
|
"learning_rate": 1.5941282340065698e-09, |
|
"logits/chosen": -4.176892280578613, |
|
"logits/rejected": -4.3910746574401855, |
|
"logps/chosen": -335.8815612792969, |
|
"logps/rejected": -304.35723876953125, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.9034252166748047, |
|
"rewards/margins": 1.3076425790786743, |
|
"rewards/rejected": 0.5957827568054199, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 496.1092972793449, |
|
"learning_rate": 2.262559558016325e-10, |
|
"logits/chosen": -4.114329814910889, |
|
"logits/rejected": -4.342306137084961, |
|
"logps/chosen": -342.4503479003906, |
|
"logps/rejected": -296.07086181640625, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": 2.0278801918029785, |
|
"rewards/margins": 1.378021001815796, |
|
"rewards/rejected": 0.6498591303825378, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5437000210468586, |
|
"train_runtime": 4622.4032, |
|
"train_samples_per_second": 8.641, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|