gshf_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
b0b9891 verified
raw
history blame contribute delete
No virus
8.24 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -1.4567933082580566,
"logits/rejected": -0.871229887008667,
"logps/chosen": -244.365234375,
"logps/rejected": -212.26486206054688,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -1.83387291431427,
"logits/rejected": -1.0804697275161743,
"logps/chosen": -206.00912475585938,
"logps/rejected": -202.784912109375,
"loss": 0.6817,
"rewards/accuracies": 0.5486111044883728,
"rewards/chosen": -0.039022047072649,
"rewards/margins": 0.04178649187088013,
"rewards/rejected": -0.08080853521823883,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.989935734988097e-07,
"logits/chosen": -1.0675297975540161,
"logits/rejected": -0.5359733700752258,
"logps/chosen": -237.27444458007812,
"logps/rejected": -251.00753784179688,
"loss": 0.6561,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.7787758111953735,
"rewards/margins": 0.11565746366977692,
"rewards/rejected": -0.8944332003593445,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.877641290737883e-07,
"logits/chosen": -1.095473289489746,
"logits/rejected": -0.37094515562057495,
"logps/chosen": -244.32162475585938,
"logps/rejected": -296.1733703613281,
"loss": 0.5953,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.6706979870796204,
"rewards/margins": 0.5164287090301514,
"rewards/rejected": -1.187126636505127,
"step": 30
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": -0.8634458780288696,
"logits/rejected": 0.12595783174037933,
"logps/chosen": -242.0459442138672,
"logps/rejected": -296.41595458984375,
"loss": 0.5648,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.5677449703216553,
"rewards/margins": 0.5976042747497559,
"rewards/rejected": -1.1653492450714111,
"step": 40
},
{
"epoch": 0.32,
"learning_rate": 4.3069871595684787e-07,
"logits/chosen": -0.6954927444458008,
"logits/rejected": 0.03154268115758896,
"logps/chosen": -246.68258666992188,
"logps/rejected": -295.62884521484375,
"loss": 0.5913,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.8166979551315308,
"rewards/margins": 0.5098680257797241,
"rewards/rejected": -1.3265659809112549,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.877242453630256e-07,
"logits/chosen": -0.768271267414093,
"logits/rejected": 0.022685179486870766,
"logps/chosen": -245.92782592773438,
"logps/rejected": -300.2510681152344,
"loss": 0.5887,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.7170382738113403,
"rewards/margins": 0.5133967399597168,
"rewards/rejected": -1.2304350137710571,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 3.378437060203357e-07,
"logits/chosen": -0.5168389081954956,
"logits/rejected": 0.45852264761924744,
"logps/chosen": -256.852294921875,
"logps/rejected": -309.4953308105469,
"loss": 0.5836,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.9044780731201172,
"rewards/margins": 0.5655065178871155,
"rewards/rejected": -1.4699846506118774,
"step": 70
},
{
"epoch": 0.51,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": -0.3654092848300934,
"logits/rejected": 0.10795004665851593,
"logps/chosen": -251.9696502685547,
"logps/rejected": -292.9334716796875,
"loss": 0.5522,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.7978931069374084,
"rewards/margins": 0.38232654333114624,
"rewards/rejected": -1.1802196502685547,
"step": 80
},
{
"epoch": 0.58,
"learning_rate": 2.2759017277414164e-07,
"logits/chosen": -0.8108726739883423,
"logits/rejected": 0.14660978317260742,
"logps/chosen": -273.36419677734375,
"logps/rejected": -320.58209228515625,
"loss": 0.5671,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.6586915254592896,
"rewards/margins": 0.6224299669265747,
"rewards/rejected": -1.2811213731765747,
"step": 90
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626315e-07,
"logits/chosen": -0.46979203820228577,
"logits/rejected": 0.5494852066040039,
"logps/chosen": -272.69427490234375,
"logps/rejected": -317.7990417480469,
"loss": 0.5547,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.9002830386161804,
"rewards/margins": 0.5319327116012573,
"rewards/rejected": -1.432215690612793,
"step": 100
},
{
"epoch": 0.7,
"learning_rate": 1.2177518064852348e-07,
"logits/chosen": -0.3219306170940399,
"logits/rejected": 0.26910799741744995,
"logps/chosen": -251.5453338623047,
"logps/rejected": -299.8834533691406,
"loss": 0.56,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.7972058057785034,
"rewards/margins": 0.43246564269065857,
"rewards/rejected": -1.2296714782714844,
"step": 110
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": -0.37325382232666016,
"logits/rejected": 0.5774334669113159,
"logps/chosen": -233.79562377929688,
"logps/rejected": -328.5582580566406,
"loss": 0.5585,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.6402639150619507,
"rewards/margins": 0.7515830397605896,
"rewards/rejected": -1.3918468952178955,
"step": 120
},
{
"epoch": 0.83,
"learning_rate": 4.1356686569674335e-08,
"logits/chosen": -0.3119003176689148,
"logits/rejected": 0.8427650332450867,
"logps/chosen": -233.98971557617188,
"logps/rejected": -324.93316650390625,
"loss": 0.5265,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.7347938418388367,
"rewards/margins": 0.7224765419960022,
"rewards/rejected": -1.4572702646255493,
"step": 130
},
{
"epoch": 0.9,
"learning_rate": 1.5941282340065697e-08,
"logits/chosen": -0.20903070271015167,
"logits/rejected": 0.7928945422172546,
"logps/chosen": -274.28704833984375,
"logps/rejected": -331.6188049316406,
"loss": 0.5484,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.9145911931991577,
"rewards/margins": 0.5992218255996704,
"rewards/rejected": -1.5138130187988281,
"step": 140
},
{
"epoch": 0.96,
"learning_rate": 2.2625595580163247e-09,
"logits/chosen": 0.10685434192419052,
"logits/rejected": 0.766906201839447,
"logps/chosen": -257.482666015625,
"logps/rejected": -326.8499450683594,
"loss": 0.5539,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.9127100706100464,
"rewards/margins": 0.6432833075523376,
"rewards/rejected": -1.5559935569763184,
"step": 150
},
{
"epoch": 1.0,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.5776262069359804,
"train_runtime": 9474.0276,
"train_samples_per_second": 2.111,
"train_steps_per_second": 0.016
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}