|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.4567933082580566, |
|
"logits/rejected": -0.871229887008667, |
|
"logps/chosen": -244.365234375, |
|
"logps/rejected": -212.26486206054688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.83387291431427, |
|
"logits/rejected": -1.0804697275161743, |
|
"logps/chosen": -206.00912475585938, |
|
"logps/rejected": -202.784912109375, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.5486111044883728, |
|
"rewards/chosen": -0.039022047072649, |
|
"rewards/margins": 0.04178649187088013, |
|
"rewards/rejected": -0.08080853521823883, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -1.0675297975540161, |
|
"logits/rejected": -0.5359733700752258, |
|
"logps/chosen": -237.27444458007812, |
|
"logps/rejected": -251.00753784179688, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7787758111953735, |
|
"rewards/margins": 0.11565746366977692, |
|
"rewards/rejected": -0.8944332003593445, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -1.095473289489746, |
|
"logits/rejected": -0.37094515562057495, |
|
"logps/chosen": -244.32162475585938, |
|
"logps/rejected": -296.1733703613281, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6706979870796204, |
|
"rewards/margins": 0.5164287090301514, |
|
"rewards/rejected": -1.187126636505127, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -0.8634458780288696, |
|
"logits/rejected": 0.12595783174037933, |
|
"logps/chosen": -242.0459442138672, |
|
"logps/rejected": -296.41595458984375, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5677449703216553, |
|
"rewards/margins": 0.5976042747497559, |
|
"rewards/rejected": -1.1653492450714111, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -0.6954927444458008, |
|
"logits/rejected": 0.03154268115758896, |
|
"logps/chosen": -246.68258666992188, |
|
"logps/rejected": -295.62884521484375, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8166979551315308, |
|
"rewards/margins": 0.5098680257797241, |
|
"rewards/rejected": -1.3265659809112549, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -0.768271267414093, |
|
"logits/rejected": 0.022685179486870766, |
|
"logps/chosen": -245.92782592773438, |
|
"logps/rejected": -300.2510681152344, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7170382738113403, |
|
"rewards/margins": 0.5133967399597168, |
|
"rewards/rejected": -1.2304350137710571, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -0.5168389081954956, |
|
"logits/rejected": 0.45852264761924744, |
|
"logps/chosen": -256.852294921875, |
|
"logps/rejected": -309.4953308105469, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9044780731201172, |
|
"rewards/margins": 0.5655065178871155, |
|
"rewards/rejected": -1.4699846506118774, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -0.3654092848300934, |
|
"logits/rejected": 0.10795004665851593, |
|
"logps/chosen": -251.9696502685547, |
|
"logps/rejected": -292.9334716796875, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7978931069374084, |
|
"rewards/margins": 0.38232654333114624, |
|
"rewards/rejected": -1.1802196502685547, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -0.8108726739883423, |
|
"logits/rejected": 0.14660978317260742, |
|
"logps/chosen": -273.36419677734375, |
|
"logps/rejected": -320.58209228515625, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6586915254592896, |
|
"rewards/margins": 0.6224299669265747, |
|
"rewards/rejected": -1.2811213731765747, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -0.46979203820228577, |
|
"logits/rejected": 0.5494852066040039, |
|
"logps/chosen": -272.69427490234375, |
|
"logps/rejected": -317.7990417480469, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9002830386161804, |
|
"rewards/margins": 0.5319327116012573, |
|
"rewards/rejected": -1.432215690612793, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -0.3219306170940399, |
|
"logits/rejected": 0.26910799741744995, |
|
"logps/chosen": -251.5453338623047, |
|
"logps/rejected": -299.8834533691406, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7972058057785034, |
|
"rewards/margins": 0.43246564269065857, |
|
"rewards/rejected": -1.2296714782714844, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -0.37325382232666016, |
|
"logits/rejected": 0.5774334669113159, |
|
"logps/chosen": -233.79562377929688, |
|
"logps/rejected": -328.5582580566406, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6402639150619507, |
|
"rewards/margins": 0.7515830397605896, |
|
"rewards/rejected": -1.3918468952178955, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -0.3119003176689148, |
|
"logits/rejected": 0.8427650332450867, |
|
"logps/chosen": -233.98971557617188, |
|
"logps/rejected": -324.93316650390625, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7347938418388367, |
|
"rewards/margins": 0.7224765419960022, |
|
"rewards/rejected": -1.4572702646255493, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -0.20903070271015167, |
|
"logits/rejected": 0.7928945422172546, |
|
"logps/chosen": -274.28704833984375, |
|
"logps/rejected": -331.6188049316406, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9145911931991577, |
|
"rewards/margins": 0.5992218255996704, |
|
"rewards/rejected": -1.5138130187988281, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": 0.10685434192419052, |
|
"logits/rejected": 0.766906201839447, |
|
"logps/chosen": -257.482666015625, |
|
"logps/rejected": -326.8499450683594, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9127100706100464, |
|
"rewards/margins": 0.6432833075523376, |
|
"rewards/rejected": -1.5559935569763184, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5776262069359804, |
|
"train_runtime": 9474.0276, |
|
"train_samples_per_second": 2.111, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|