pythia-1.4b-dpo-full / trainer_state.json
nnheui's picture
Model save
c3d5b59 verified
raw
history blame
No virus
38.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990186457311089,
"eval_steps": 100,
"global_step": 509,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001962708537782139,
"grad_norm": 2.4117076017287205,
"learning_rate": 9.803921568627451e-09,
"logits/chosen": -1.125,
"logits/rejected": -1.1875,
"logps/chosen": -500.0,
"logps/chosen_bottom_tokens": -14.5,
"logps/chosen_top_tokens": -0.0005645751953125,
"logps/rejected": -520.0,
"logps/rejected_bottom_tokens": -13.9375,
"logps/rejected_top_tokens": -0.00054168701171875,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.019627085377821395,
"grad_norm": 2.3800058601187866,
"learning_rate": 9.80392156862745e-08,
"logits/chosen": -1.1640625,
"logits/rejected": -1.203125,
"logps/chosen": -380.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.000804901123046875,
"logps/rejected": -316.0,
"logps/rejected_bottom_tokens": -14.125,
"logps/rejected_top_tokens": -0.000827789306640625,
"loss": 0.6922,
"rewards/accuracies": 0.41111111640930176,
"rewards/chosen": -0.000202178955078125,
"rewards/margins": 0.0035247802734375,
"rewards/rejected": -0.00372314453125,
"step": 10
},
{
"epoch": 0.03925417075564279,
"grad_norm": 2.4064882227881057,
"learning_rate": 1.96078431372549e-07,
"logits/chosen": -1.0859375,
"logits/rejected": -1.1484375,
"logps/chosen": -374.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.000835418701171875,
"logps/rejected": -324.0,
"logps/rejected_bottom_tokens": -14.0625,
"logps/rejected_top_tokens": -0.00084686279296875,
"loss": 0.6913,
"rewards/accuracies": 0.4399999976158142,
"rewards/chosen": 0.003662109375,
"rewards/margins": 0.0033111572265625,
"rewards/rejected": 0.0003528594970703125,
"step": 20
},
{
"epoch": 0.058881256133464184,
"grad_norm": 2.3536995350535426,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": -1.125,
"logits/rejected": -1.1796875,
"logps/chosen": -364.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.000762939453125,
"logps/rejected": -324.0,
"logps/rejected_bottom_tokens": -14.125,
"logps/rejected_top_tokens": -0.00078582763671875,
"loss": 0.6938,
"rewards/accuracies": 0.3850000202655792,
"rewards/chosen": -0.0030670166015625,
"rewards/margins": -0.0067138671875,
"rewards/rejected": 0.003631591796875,
"step": 30
},
{
"epoch": 0.07850834151128558,
"grad_norm": 2.3870217018270155,
"learning_rate": 3.92156862745098e-07,
"logits/chosen": -1.125,
"logits/rejected": -1.15625,
"logps/chosen": -378.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.000759124755859375,
"logps/rejected": -338.0,
"logps/rejected_bottom_tokens": -14.125,
"logps/rejected_top_tokens": -0.000804901123046875,
"loss": 0.6929,
"rewards/accuracies": 0.445000022649765,
"rewards/chosen": 0.000881195068359375,
"rewards/margins": 0.00244140625,
"rewards/rejected": -0.00154876708984375,
"step": 40
},
{
"epoch": 0.09813542688910697,
"grad_norm": 2.4788478916800147,
"learning_rate": 4.901960784313725e-07,
"logits/chosen": -1.1171875,
"logits/rejected": -1.1484375,
"logps/chosen": -406.0,
"logps/chosen_bottom_tokens": -14.0625,
"logps/chosen_top_tokens": -0.000774383544921875,
"logps/rejected": -352.0,
"logps/rejected_bottom_tokens": -14.0,
"logps/rejected_top_tokens": -0.00080108642578125,
"loss": 0.6915,
"rewards/accuracies": 0.5400000214576721,
"rewards/chosen": 0.0026397705078125,
"rewards/margins": 0.005889892578125,
"rewards/rejected": -0.00323486328125,
"step": 50
},
{
"epoch": 0.11776251226692837,
"grad_norm": 2.360316334548125,
"learning_rate": 4.995237599803335e-07,
"logits/chosen": -1.140625,
"logits/rejected": -1.203125,
"logps/chosen": -406.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.000782012939453125,
"logps/rejected": -322.0,
"logps/rejected_bottom_tokens": -14.0625,
"logps/rejected_top_tokens": -0.0008087158203125,
"loss": 0.6913,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.00244140625,
"rewards/margins": 0.00201416015625,
"rewards/rejected": 0.000431060791015625,
"step": 60
},
{
"epoch": 0.13738959764474976,
"grad_norm": 2.3051434353276847,
"learning_rate": 4.978798275112142e-07,
"logits/chosen": -1.09375,
"logits/rejected": -1.1328125,
"logps/chosen": -372.0,
"logps/chosen_bottom_tokens": -14.0625,
"logps/chosen_top_tokens": -0.00078582763671875,
"logps/rejected": -330.0,
"logps/rejected_bottom_tokens": -14.0625,
"logps/rejected_top_tokens": -0.000789642333984375,
"loss": 0.688,
"rewards/accuracies": 0.5049999952316284,
"rewards/chosen": 0.00897216796875,
"rewards/margins": 0.01190185546875,
"rewards/rejected": -0.0028839111328125,
"step": 70
},
{
"epoch": 0.15701668302257116,
"grad_norm": 2.2866846976386,
"learning_rate": 4.950700530747689e-07,
"logits/chosen": -1.078125,
"logits/rejected": -1.1484375,
"logps/chosen": -378.0,
"logps/chosen_bottom_tokens": -14.0,
"logps/chosen_top_tokens": -0.000934600830078125,
"logps/rejected": -308.0,
"logps/rejected_bottom_tokens": -14.0,
"logps/rejected_top_tokens": -0.00087738037109375,
"loss": 0.685,
"rewards/accuracies": 0.5450000166893005,
"rewards/chosen": -0.00121307373046875,
"rewards/margins": 0.01483154296875,
"rewards/rejected": -0.01611328125,
"step": 80
},
{
"epoch": 0.17664376840039253,
"grad_norm": 2.3053347338418098,
"learning_rate": 4.911076517558622e-07,
"logits/chosen": -1.125,
"logits/rejected": -1.15625,
"logps/chosen": -382.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.000823974609375,
"logps/rejected": -346.0,
"logps/rejected_bottom_tokens": -14.0625,
"logps/rejected_top_tokens": -0.00084686279296875,
"loss": 0.6832,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.0106201171875,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.026611328125,
"step": 90
},
{
"epoch": 0.19627085377821393,
"grad_norm": 2.2125416576513732,
"learning_rate": 4.860112597371772e-07,
"logits/chosen": -1.125,
"logits/rejected": -1.171875,
"logps/chosen": -372.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.000904083251953125,
"logps/rejected": -328.0,
"logps/rejected_bottom_tokens": -14.0,
"logps/rejected_top_tokens": -0.0009002685546875,
"loss": 0.678,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.017333984375,
"rewards/margins": 0.0361328125,
"rewards/rejected": -0.053466796875,
"step": 100
},
{
"epoch": 0.19627085377821393,
"eval_logits/chosen": -1.09375,
"eval_logits/rejected": -1.15625,
"eval_logps/chosen": -396.0,
"eval_logps/chosen_bottom_tokens": -14.0625,
"eval_logps/chosen_top_tokens": -0.0008697509765625,
"eval_logps/rejected": -344.0,
"eval_logps/rejected_bottom_tokens": -14.0,
"eval_logps/rejected_top_tokens": -0.0008697509765625,
"eval_loss": 0.6789160370826721,
"eval_rewards/accuracies": 0.5880597233772278,
"eval_rewards/chosen": -0.0274658203125,
"eval_rewards/margins": 0.033203125,
"eval_rewards/rejected": -0.060791015625,
"eval_runtime": 111.5869,
"eval_samples_per_second": 17.923,
"eval_steps_per_second": 0.6,
"step": 100
},
{
"epoch": 0.21589793915603533,
"grad_norm": 2.438395616681449,
"learning_rate": 4.798048466485017e-07,
"logits/chosen": -1.1015625,
"logits/rejected": -1.109375,
"logps/chosen": -344.0,
"logps/chosen_bottom_tokens": -14.0,
"logps/chosen_top_tokens": -0.000835418701171875,
"logps/rejected": -332.0,
"logps/rejected_bottom_tokens": -14.0,
"logps/rejected_top_tokens": -0.000873565673828125,
"loss": 0.6804,
"rewards/accuracies": 0.5899999737739563,
"rewards/chosen": -0.037109375,
"rewards/margins": 0.02001953125,
"rewards/rejected": -0.05712890625,
"step": 110
},
{
"epoch": 0.23552502453385674,
"grad_norm": 2.226213549318803,
"learning_rate": 4.725176028314541e-07,
"logits/chosen": -1.109375,
"logits/rejected": -1.1171875,
"logps/chosen": -372.0,
"logps/chosen_bottom_tokens": -14.0,
"logps/chosen_top_tokens": -0.0008544921875,
"logps/rejected": -354.0,
"logps/rejected_bottom_tokens": -14.0,
"logps/rejected_top_tokens": -0.0008544921875,
"loss": 0.6745,
"rewards/accuracies": 0.6399999856948853,
"rewards/chosen": -0.03564453125,
"rewards/margins": 0.0517578125,
"rewards/rejected": -0.08740234375,
"step": 120
},
{
"epoch": 0.25515210991167814,
"grad_norm": 2.4135162897156706,
"learning_rate": 4.641838020498713e-07,
"logits/chosen": -1.09375,
"logits/rejected": -1.1640625,
"logps/chosen": -408.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.000934600830078125,
"logps/rejected": -338.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.00092315673828125,
"loss": 0.6674,
"rewards/accuracies": 0.5849999785423279,
"rewards/chosen": -0.0703125,
"rewards/margins": 0.0517578125,
"rewards/rejected": -0.1220703125,
"step": 130
},
{
"epoch": 0.2747791952894995,
"grad_norm": 2.4502181786024004,
"learning_rate": 4.5484264029156733e-07,
"logits/chosen": -1.1015625,
"logits/rejected": -1.1484375,
"logps/chosen": -386.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.000812530517578125,
"logps/rejected": -336.0,
"logps/rejected_bottom_tokens": -14.125,
"logps/rejected_top_tokens": -0.00083160400390625,
"loss": 0.6635,
"rewards/accuracies": 0.5900000333786011,
"rewards/chosen": -0.1015625,
"rewards/margins": 0.048828125,
"rewards/rejected": -0.150390625,
"step": 140
},
{
"epoch": 0.2944062806673209,
"grad_norm": 2.4663119079457614,
"learning_rate": 4.445380514196192e-07,
"logits/chosen": -1.09375,
"logits/rejected": -1.171875,
"logps/chosen": -428.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.00087738037109375,
"logps/rejected": -356.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.000858306884765625,
"loss": 0.668,
"rewards/accuracies": 0.6350000500679016,
"rewards/chosen": -0.12890625,
"rewards/margins": 0.0673828125,
"rewards/rejected": -0.1962890625,
"step": 150
},
{
"epoch": 0.3140333660451423,
"grad_norm": 2.455591342132379,
"learning_rate": 4.33318500540218e-07,
"logits/chosen": -1.0859375,
"logits/rejected": -1.1328125,
"logps/chosen": -408.0,
"logps/chosen_bottom_tokens": -14.125,
"logps/chosen_top_tokens": -0.00089263916015625,
"logps/rejected": -368.0,
"logps/rejected_bottom_tokens": -14.0625,
"logps/rejected_top_tokens": -0.000904083251953125,
"loss": 0.6655,
"rewards/accuracies": 0.6050000190734863,
"rewards/chosen": -0.16796875,
"rewards/margins": 0.078125,
"rewards/rejected": -0.24609375,
"step": 160
},
{
"epoch": 0.3336604514229637,
"grad_norm": 2.436300399124971,
"learning_rate": 4.2123675605892985e-07,
"logits/chosen": -1.078125,
"logits/rejected": -1.1484375,
"logps/chosen": -422.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.00101470947265625,
"logps/rejected": -364.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.00098419189453125,
"loss": 0.6585,
"rewards/accuracies": 0.6450000405311584,
"rewards/chosen": -0.1826171875,
"rewards/margins": 0.115234375,
"rewards/rejected": -0.296875,
"step": 170
},
{
"epoch": 0.35328753680078506,
"grad_norm": 2.5546008416763035,
"learning_rate": 4.0834964149744333e-07,
"logits/chosen": -1.1015625,
"logits/rejected": -1.15625,
"logps/chosen": -416.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.00093841552734375,
"logps/rejected": -380.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.00099945068359375,
"loss": 0.6643,
"rewards/accuracies": 0.6100000143051147,
"rewards/chosen": -0.232421875,
"rewards/margins": 0.06396484375,
"rewards/rejected": -0.296875,
"step": 180
},
{
"epoch": 0.3729146221786065,
"grad_norm": 2.555290762655567,
"learning_rate": 3.947177682380738e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.203125,
"logps/chosen": -378.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.000789642333984375,
"logps/rejected": -356.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.000823974609375,
"loss": 0.6499,
"rewards/accuracies": 0.6450001001358032,
"rewards/chosen": -0.2412109375,
"rewards/margins": 0.11669921875,
"rewards/rejected": -0.357421875,
"step": 190
},
{
"epoch": 0.39254170755642787,
"grad_norm": 2.7737043586573313,
"learning_rate": 3.804052504529933e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.2265625,
"logps/chosen": -392.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.0008392333984375,
"logps/rejected": -370.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.00087738037109375,
"loss": 0.645,
"rewards/accuracies": 0.6350000500679016,
"rewards/chosen": -0.271484375,
"rewards/margins": 0.1396484375,
"rewards/rejected": -0.41015625,
"step": 200
},
{
"epoch": 0.39254170755642787,
"eval_logits/chosen": -1.15625,
"eval_logits/rejected": -1.203125,
"eval_logps/chosen": -422.0,
"eval_logps/chosen_bottom_tokens": -14.375,
"eval_logps/chosen_top_tokens": -0.000911712646484375,
"eval_logps/rejected": -380.0,
"eval_logps/rejected_bottom_tokens": -14.3125,
"eval_logps/rejected_top_tokens": -0.000919342041015625,
"eval_loss": 0.6488671898841858,
"eval_rewards/accuracies": 0.6447761058807373,
"eval_rewards/chosen": -0.287109375,
"eval_rewards/margins": 0.13671875,
"eval_rewards/rejected": -0.423828125,
"eval_runtime": 111.5112,
"eval_samples_per_second": 17.935,
"eval_steps_per_second": 0.601,
"step": 200
},
{
"epoch": 0.41216879293424924,
"grad_norm": 2.8286672144445277,
"learning_rate": 3.654794035589483e-07,
"logits/chosen": -1.1328125,
"logits/rejected": -1.1640625,
"logps/chosen": -362.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.00091552734375,
"logps/rejected": -344.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000926971435546875,
"loss": 0.6512,
"rewards/accuracies": 0.6149999499320984,
"rewards/chosen": -0.298828125,
"rewards/margins": 0.12060546875,
"rewards/rejected": -0.419921875,
"step": 210
},
{
"epoch": 0.43179587831207067,
"grad_norm": 2.98579141751378,
"learning_rate": 3.5001042761570826e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.2109375,
"logps/chosen": -414.0,
"logps/chosen_bottom_tokens": -14.5,
"logps/chosen_top_tokens": -0.000762939453125,
"logps/rejected": -398.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.000743865966796875,
"loss": 0.6507,
"rewards/accuracies": 0.5800000429153442,
"rewards/chosen": -0.333984375,
"rewards/margins": 0.11279296875,
"rewards/rejected": -0.447265625,
"step": 220
},
{
"epoch": 0.45142296368989204,
"grad_norm": 2.849801650804548,
"learning_rate": 3.34071077157304e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.2265625,
"logps/chosen": -388.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.00075531005859375,
"logps/rejected": -354.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.000827789306640625,
"loss": 0.6464,
"rewards/accuracies": 0.6299999952316284,
"rewards/chosen": -0.353515625,
"rewards/margins": 0.1337890625,
"rewards/rejected": -0.48828125,
"step": 230
},
{
"epoch": 0.47105004906771347,
"grad_norm": 3.020709895469043,
"learning_rate": 3.1773631900892204e-07,
"logits/chosen": -1.1484375,
"logits/rejected": -1.1875,
"logps/chosen": -416.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.000759124755859375,
"logps/rejected": -396.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.0007781982421875,
"loss": 0.6442,
"rewards/accuracies": 0.6200000047683716,
"rewards/chosen": -0.38671875,
"rewards/margins": 0.1337890625,
"rewards/rejected": -0.51953125,
"step": 240
},
{
"epoch": 0.49067713444553485,
"grad_norm": 2.801068325901482,
"learning_rate": 3.0108297969883103e-07,
"logits/chosen": -1.1640625,
"logits/rejected": -1.1953125,
"logps/chosen": -426.0,
"logps/chosen_bottom_tokens": -14.4375,
"logps/chosen_top_tokens": -0.0008392333984375,
"logps/rejected": -398.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.00081634521484375,
"loss": 0.6347,
"rewards/accuracies": 0.64000004529953,
"rewards/chosen": -0.39453125,
"rewards/margins": 0.1865234375,
"rewards/rejected": -0.58203125,
"step": 250
},
{
"epoch": 0.5103042198233563,
"grad_norm": 2.8119914001202835,
"learning_rate": 2.8418938412365013e-07,
"logits/chosen": -1.1640625,
"logits/rejected": -1.203125,
"logps/chosen": -396.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.000865936279296875,
"logps/rejected": -372.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.000885009765625,
"loss": 0.6381,
"rewards/accuracies": 0.6350000500679016,
"rewards/chosen": -0.373046875,
"rewards/margins": 0.171875,
"rewards/rejected": -0.546875,
"step": 260
},
{
"epoch": 0.5299313052011776,
"grad_norm": 2.914608701481186,
"learning_rate": 2.671349871664101e-07,
"logits/chosen": -1.1640625,
"logits/rejected": -1.171875,
"logps/chosen": -398.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.000782012939453125,
"logps/rejected": -386.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.00077056884765625,
"loss": 0.6315,
"rewards/accuracies": 0.64000004529953,
"rewards/chosen": -0.40625,
"rewards/margins": 0.17578125,
"rewards/rejected": -0.58203125,
"step": 270
},
{
"epoch": 0.549558390578999,
"grad_norm": 2.974677635397429,
"learning_rate": 2.5e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.1953125,
"logps/chosen": -438.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.000911712646484375,
"logps/rejected": -402.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.0009002685546875,
"loss": 0.6384,
"rewards/accuracies": 0.6600000262260437,
"rewards/chosen": -0.42578125,
"rewards/margins": 0.21484375,
"rewards/rejected": -0.640625,
"step": 280
},
{
"epoch": 0.5691854759568205,
"grad_norm": 3.4767790428686234,
"learning_rate": 2.3286501283358982e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.2421875,
"logps/chosen": -412.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.0008392333984375,
"logps/rejected": -376.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000885009765625,
"loss": 0.632,
"rewards/accuracies": 0.5750000476837158,
"rewards/chosen": -0.4609375,
"rewards/margins": 0.16015625,
"rewards/rejected": -0.62109375,
"step": 290
},
{
"epoch": 0.5888125613346418,
"grad_norm": 3.0983859451271565,
"learning_rate": 2.1581061587634987e-07,
"logits/chosen": -1.203125,
"logits/rejected": -1.2421875,
"logps/chosen": -428.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.00075531005859375,
"logps/rejected": -388.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.00079345703125,
"loss": 0.6396,
"rewards/accuracies": 0.5999999642372131,
"rewards/chosen": -0.482421875,
"rewards/margins": 0.162109375,
"rewards/rejected": -0.64453125,
"step": 300
},
{
"epoch": 0.5888125613346418,
"eval_logits/chosen": -1.1875,
"eval_logits/rejected": -1.234375,
"eval_logps/chosen": -438.0,
"eval_logps/chosen_bottom_tokens": -14.375,
"eval_logps/chosen_top_tokens": -0.0007476806640625,
"eval_logps/rejected": -406.0,
"eval_logps/rejected_bottom_tokens": -14.3125,
"eval_logps/rejected_top_tokens": -0.000759124755859375,
"eval_loss": 0.6303857564926147,
"eval_rewards/accuracies": 0.6626865863800049,
"eval_rewards/chosen": -0.451171875,
"eval_rewards/margins": 0.2275390625,
"eval_rewards/rejected": -0.6796875,
"eval_runtime": 111.5027,
"eval_samples_per_second": 17.937,
"eval_steps_per_second": 0.601,
"step": 300
},
{
"epoch": 0.6084396467124632,
"grad_norm": 3.1412458629194835,
"learning_rate": 1.9891702030116897e-07,
"logits/chosen": -1.140625,
"logits/rejected": -1.2421875,
"logps/chosen": -446.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.00074005126953125,
"logps/rejected": -358.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.0007171630859375,
"loss": 0.6234,
"rewards/accuracies": 0.6949999928474426,
"rewards/chosen": -0.408203125,
"rewards/margins": 0.2451171875,
"rewards/rejected": -0.65625,
"step": 310
},
{
"epoch": 0.6280667320902846,
"grad_norm": 3.1923082526436986,
"learning_rate": 1.8226368099107792e-07,
"logits/chosen": -1.1640625,
"logits/rejected": -1.2109375,
"logps/chosen": -424.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.00080108642578125,
"logps/rejected": -364.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000843048095703125,
"loss": 0.6241,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.42578125,
"rewards/margins": 0.2216796875,
"rewards/rejected": -0.6484375,
"step": 320
},
{
"epoch": 0.647693817468106,
"grad_norm": 3.064211696764281,
"learning_rate": 1.6592892284269594e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.2109375,
"logps/chosen": -408.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.0007171630859375,
"logps/rejected": -386.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.000732421875,
"loss": 0.6224,
"rewards/accuracies": 0.6799999475479126,
"rewards/chosen": -0.431640625,
"rewards/margins": 0.259765625,
"rewards/rejected": -0.69140625,
"step": 330
},
{
"epoch": 0.6673209028459274,
"grad_norm": 3.1791023826814353,
"learning_rate": 1.4998957238429172e-07,
"logits/chosen": -1.21875,
"logits/rejected": -1.2421875,
"logps/chosen": -408.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.000804901123046875,
"logps/rejected": -380.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.00077056884765625,
"loss": 0.6204,
"rewards/accuracies": 0.6300000548362732,
"rewards/chosen": -0.5078125,
"rewards/margins": 0.244140625,
"rewards/rejected": -0.75390625,
"step": 340
},
{
"epoch": 0.6869479882237488,
"grad_norm": 3.295570474728778,
"learning_rate": 1.345205964410517e-07,
"logits/chosen": -1.1953125,
"logits/rejected": -1.21875,
"logps/chosen": -392.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.000858306884765625,
"logps/rejected": -372.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.00089263916015625,
"loss": 0.627,
"rewards/accuracies": 0.5850000381469727,
"rewards/chosen": -0.49609375,
"rewards/margins": 0.2236328125,
"rewards/rejected": -0.71875,
"step": 350
},
{
"epoch": 0.7065750736015701,
"grad_norm": 3.5211819482445184,
"learning_rate": 1.1959474954700665e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.21875,
"logps/chosen": -424.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.00067138671875,
"logps/rejected": -416.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000675201416015625,
"loss": 0.613,
"rewards/accuracies": 0.7049999833106995,
"rewards/chosen": -0.44140625,
"rewards/margins": 0.234375,
"rewards/rejected": -0.67578125,
"step": 360
},
{
"epoch": 0.7262021589793916,
"grad_norm": 3.3333877037469026,
"learning_rate": 1.0528223176192615e-07,
"logits/chosen": -1.1953125,
"logits/rejected": -1.234375,
"logps/chosen": -442.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.00069427490234375,
"logps/rejected": -398.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.00067901611328125,
"loss": 0.6218,
"rewards/accuracies": 0.6250000596046448,
"rewards/chosen": -0.5234375,
"rewards/margins": 0.2138671875,
"rewards/rejected": -0.73828125,
"step": 370
},
{
"epoch": 0.745829244357213,
"grad_norm": 3.3039144354882657,
"learning_rate": 9.16503585025567e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.21875,
"logps/chosen": -420.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.00090789794921875,
"logps/rejected": -412.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.00104522705078125,
"loss": 0.6279,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.5,
"rewards/margins": 0.2041015625,
"rewards/rejected": -0.703125,
"step": 380
},
{
"epoch": 0.7654563297350343,
"grad_norm": 3.460907844274303,
"learning_rate": 7.876324394107017e-08,
"logits/chosen": -1.15625,
"logits/rejected": -1.203125,
"logps/chosen": -442.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.00067901611328125,
"logps/rejected": -418.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.00069427490234375,
"loss": 0.6289,
"rewards/accuracies": 0.6350000500679016,
"rewards/chosen": -0.50390625,
"rewards/margins": 0.2255859375,
"rewards/rejected": -0.7265625,
"step": 390
},
{
"epoch": 0.7850834151128557,
"grad_norm": 3.2842912290921897,
"learning_rate": 6.668149945978201e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.2265625,
"logps/chosen": -440.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.00070953369140625,
"logps/rejected": -420.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000732421875,
"loss": 0.6102,
"rewards/accuracies": 0.6700000166893005,
"rewards/chosen": -0.482421875,
"rewards/margins": 0.291015625,
"rewards/rejected": -0.7734375,
"step": 400
},
{
"epoch": 0.7850834151128557,
"eval_logits/chosen": -1.1875,
"eval_logits/rejected": -1.234375,
"eval_logps/chosen": -444.0,
"eval_logps/chosen_bottom_tokens": -14.3125,
"eval_logps/chosen_top_tokens": -0.00067138671875,
"eval_logps/rejected": -414.0,
"eval_logps/rejected_bottom_tokens": -14.25,
"eval_logps/rejected_top_tokens": -0.00066375732421875,
"eval_loss": 0.6267920136451721,
"eval_rewards/accuracies": 0.6567164063453674,
"eval_rewards/chosen": -0.50390625,
"eval_rewards/margins": 0.2578125,
"eval_rewards/rejected": -0.76171875,
"eval_runtime": 111.5791,
"eval_samples_per_second": 17.925,
"eval_steps_per_second": 0.6,
"step": 400
},
{
"epoch": 0.8047105004906772,
"grad_norm": 3.3007954730404303,
"learning_rate": 5.546194858038072e-08,
"logits/chosen": -1.171875,
"logits/rejected": -1.21875,
"logps/chosen": -416.0,
"logps/chosen_bottom_tokens": -14.3125,
"logps/chosen_top_tokens": -0.000766754150390625,
"logps/rejected": -374.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000827789306640625,
"loss": 0.6227,
"rewards/accuracies": 0.6699999570846558,
"rewards/chosen": -0.51171875,
"rewards/margins": 0.28515625,
"rewards/rejected": -0.796875,
"step": 410
},
{
"epoch": 0.8243375858684985,
"grad_norm": 3.9743687860867185,
"learning_rate": 4.5157359708432626e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.2265625,
"logps/chosen": -394.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.00075531005859375,
"logps/rejected": -412.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.0007476806640625,
"loss": 0.6205,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.474609375,
"rewards/margins": 0.234375,
"rewards/rejected": -0.70703125,
"step": 420
},
{
"epoch": 0.8439646712463199,
"grad_norm": 3.1969688623984633,
"learning_rate": 3.581619795012874e-08,
"logits/chosen": -1.1796875,
"logits/rejected": -1.1875,
"logps/chosen": -400.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.000782012939453125,
"logps/rejected": -404.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.000743865966796875,
"loss": 0.6208,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.474609375,
"rewards/margins": 0.267578125,
"rewards/rejected": -0.7421875,
"step": 430
},
{
"epoch": 0.8635917566241413,
"grad_norm": 3.705663203159775,
"learning_rate": 2.748239716854589e-08,
"logits/chosen": -1.2109375,
"logits/rejected": -1.1953125,
"logps/chosen": -424.0,
"logps/chosen_bottom_tokens": -14.1875,
"logps/chosen_top_tokens": -0.000728607177734375,
"logps/rejected": -420.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.000751495361328125,
"loss": 0.6398,
"rewards/accuracies": 0.5849999785423279,
"rewards/chosen": -0.51171875,
"rewards/margins": 0.1435546875,
"rewards/rejected": -0.65625,
"step": 440
},
{
"epoch": 0.8832188420019627,
"grad_norm": 3.9792023056235455,
"learning_rate": 2.0195153351498323e-08,
"logits/chosen": -1.1796875,
"logits/rejected": -1.2109375,
"logps/chosen": -432.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.000732421875,
"logps/rejected": -420.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.000705718994140625,
"loss": 0.611,
"rewards/accuracies": 0.6149999499320984,
"rewards/chosen": -0.53125,
"rewards/margins": 0.2421875,
"rewards/rejected": -0.7734375,
"step": 450
},
{
"epoch": 0.9028459273797841,
"grad_norm": 3.598443005581659,
"learning_rate": 1.3988740262822846e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.203125,
"logps/chosen": -428.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.00067138671875,
"logps/rejected": -410.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.000667572021484375,
"loss": 0.6138,
"rewards/accuracies": 0.6349999904632568,
"rewards/chosen": -0.490234375,
"rewards/margins": 0.216796875,
"rewards/rejected": -0.70703125,
"step": 460
},
{
"epoch": 0.9224730127576055,
"grad_norm": 3.423571391469107,
"learning_rate": 8.892348244137788e-09,
"logits/chosen": -1.1875,
"logits/rejected": -1.2421875,
"logps/chosen": -474.0,
"logps/chosen_bottom_tokens": -14.5625,
"logps/chosen_top_tokens": -0.000675201416015625,
"logps/rejected": -444.0,
"logps/rejected_bottom_tokens": -14.375,
"logps/rejected_top_tokens": -0.000652313232421875,
"loss": 0.6106,
"rewards/accuracies": 0.6299999952316284,
"rewards/chosen": -0.494140625,
"rewards/margins": 0.271484375,
"rewards/rejected": -0.765625,
"step": 470
},
{
"epoch": 0.9421000981354269,
"grad_norm": 3.1667123948106584,
"learning_rate": 4.929946925231076e-09,
"logits/chosen": -1.1328125,
"logits/rejected": -1.171875,
"logps/chosen": -410.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.0007476806640625,
"logps/rejected": -412.0,
"logps/rejected_bottom_tokens": -14.25,
"logps/rejected_top_tokens": -0.00095367431640625,
"loss": 0.6203,
"rewards/accuracies": 0.6049999594688416,
"rewards/chosen": -0.48046875,
"rewards/margins": 0.1953125,
"rewards/rejected": -0.67578125,
"step": 480
},
{
"epoch": 0.9617271835132483,
"grad_norm": 3.5902417143779024,
"learning_rate": 2.1201724887858484e-09,
"logits/chosen": -1.1640625,
"logits/rejected": -1.171875,
"logps/chosen": -422.0,
"logps/chosen_bottom_tokens": -14.25,
"logps/chosen_top_tokens": -0.00072479248046875,
"logps/rejected": -412.0,
"logps/rejected_bottom_tokens": -14.1875,
"logps/rejected_top_tokens": -0.000743865966796875,
"loss": 0.6235,
"rewards/accuracies": 0.5949999690055847,
"rewards/chosen": -0.5390625,
"rewards/margins": 0.2265625,
"rewards/rejected": -0.765625,
"step": 490
},
{
"epoch": 0.9813542688910697,
"grad_norm": 3.3154898943344704,
"learning_rate": 4.762400196664518e-10,
"logits/chosen": -1.1484375,
"logits/rejected": -1.1953125,
"logps/chosen": -428.0,
"logps/chosen_bottom_tokens": -14.375,
"logps/chosen_top_tokens": -0.000621795654296875,
"logps/rejected": -388.0,
"logps/rejected_bottom_tokens": -14.3125,
"logps/rejected_top_tokens": -0.0006256103515625,
"loss": 0.6084,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.578125,
"rewards/margins": 0.1787109375,
"rewards/rejected": -0.75390625,
"step": 500
},
{
"epoch": 0.9813542688910697,
"eval_logits/chosen": -1.1953125,
"eval_logits/rejected": -1.2421875,
"eval_logps/chosen": -446.0,
"eval_logps/chosen_bottom_tokens": -14.375,
"eval_logps/chosen_top_tokens": -0.000743865966796875,
"eval_logps/rejected": -416.0,
"eval_logps/rejected_bottom_tokens": -14.3125,
"eval_logps/rejected_top_tokens": -0.0007476806640625,
"eval_loss": 0.6259472370147705,
"eval_rewards/accuracies": 0.6567164659500122,
"eval_rewards/chosen": -0.5234375,
"eval_rewards/margins": 0.26171875,
"eval_rewards/rejected": -0.78515625,
"eval_runtime": 111.4505,
"eval_samples_per_second": 17.945,
"eval_steps_per_second": 0.601,
"step": 500
},
{
"epoch": 0.9990186457311089,
"step": 509,
"total_flos": 0.0,
"train_loss": 0.6464882252961105,
"train_runtime": 8284.9703,
"train_samples_per_second": 7.379,
"train_steps_per_second": 0.061
}
],
"logging_steps": 10,
"max_steps": 509,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}