|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990186457311089, |
|
"eval_steps": 100, |
|
"global_step": 509, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001962708537782139, |
|
"grad_norm": 2.4117076017287205, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -500.0, |
|
"logps/chosen_bottom_tokens": -14.5, |
|
"logps/chosen_top_tokens": -0.0005645751953125, |
|
"logps/rejected": -520.0, |
|
"logps/rejected_bottom_tokens": -13.9375, |
|
"logps/rejected_top_tokens": -0.00054168701171875, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.019627085377821395, |
|
"grad_norm": 2.3800058601187866, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -380.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.000804901123046875, |
|
"logps/rejected": -316.0, |
|
"logps/rejected_bottom_tokens": -14.125, |
|
"logps/rejected_top_tokens": -0.000827789306640625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.41111111640930176, |
|
"rewards/chosen": -0.000202178955078125, |
|
"rewards/margins": 0.0035247802734375, |
|
"rewards/rejected": -0.00372314453125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03925417075564279, |
|
"grad_norm": 2.4064882227881057, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -1.0859375, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -374.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.000835418701171875, |
|
"logps/rejected": -324.0, |
|
"logps/rejected_bottom_tokens": -14.0625, |
|
"logps/rejected_top_tokens": -0.00084686279296875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.4399999976158142, |
|
"rewards/chosen": 0.003662109375, |
|
"rewards/margins": 0.0033111572265625, |
|
"rewards/rejected": 0.0003528594970703125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.058881256133464184, |
|
"grad_norm": 2.3536995350535426, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.1796875, |
|
"logps/chosen": -364.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.000762939453125, |
|
"logps/rejected": -324.0, |
|
"logps/rejected_bottom_tokens": -14.125, |
|
"logps/rejected_top_tokens": -0.00078582763671875, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.3850000202655792, |
|
"rewards/chosen": -0.0030670166015625, |
|
"rewards/margins": -0.0067138671875, |
|
"rewards/rejected": 0.003631591796875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07850834151128558, |
|
"grad_norm": 2.3870217018270155, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -378.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.000759124755859375, |
|
"logps/rejected": -338.0, |
|
"logps/rejected_bottom_tokens": -14.125, |
|
"logps/rejected_top_tokens": -0.000804901123046875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.445000022649765, |
|
"rewards/chosen": 0.000881195068359375, |
|
"rewards/margins": 0.00244140625, |
|
"rewards/rejected": -0.00154876708984375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09813542688910697, |
|
"grad_norm": 2.4788478916800147, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -1.1171875, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -406.0, |
|
"logps/chosen_bottom_tokens": -14.0625, |
|
"logps/chosen_top_tokens": -0.000774383544921875, |
|
"logps/rejected": -352.0, |
|
"logps/rejected_bottom_tokens": -14.0, |
|
"logps/rejected_top_tokens": -0.00080108642578125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.0026397705078125, |
|
"rewards/margins": 0.005889892578125, |
|
"rewards/rejected": -0.00323486328125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11776251226692837, |
|
"grad_norm": 2.360316334548125, |
|
"learning_rate": 4.995237599803335e-07, |
|
"logits/chosen": -1.140625, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -406.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.000782012939453125, |
|
"logps/rejected": -322.0, |
|
"logps/rejected_bottom_tokens": -14.0625, |
|
"logps/rejected_top_tokens": -0.0008087158203125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.00244140625, |
|
"rewards/margins": 0.00201416015625, |
|
"rewards/rejected": 0.000431060791015625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13738959764474976, |
|
"grad_norm": 2.3051434353276847, |
|
"learning_rate": 4.978798275112142e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.1328125, |
|
"logps/chosen": -372.0, |
|
"logps/chosen_bottom_tokens": -14.0625, |
|
"logps/chosen_top_tokens": -0.00078582763671875, |
|
"logps/rejected": -330.0, |
|
"logps/rejected_bottom_tokens": -14.0625, |
|
"logps/rejected_top_tokens": -0.000789642333984375, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5049999952316284, |
|
"rewards/chosen": 0.00897216796875, |
|
"rewards/margins": 0.01190185546875, |
|
"rewards/rejected": -0.0028839111328125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15701668302257116, |
|
"grad_norm": 2.2866846976386, |
|
"learning_rate": 4.950700530747689e-07, |
|
"logits/chosen": -1.078125, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -378.0, |
|
"logps/chosen_bottom_tokens": -14.0, |
|
"logps/chosen_top_tokens": -0.000934600830078125, |
|
"logps/rejected": -308.0, |
|
"logps/rejected_bottom_tokens": -14.0, |
|
"logps/rejected_top_tokens": -0.00087738037109375, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5450000166893005, |
|
"rewards/chosen": -0.00121307373046875, |
|
"rewards/margins": 0.01483154296875, |
|
"rewards/rejected": -0.01611328125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17664376840039253, |
|
"grad_norm": 2.3053347338418098, |
|
"learning_rate": 4.911076517558622e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -382.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.000823974609375, |
|
"logps/rejected": -346.0, |
|
"logps/rejected_bottom_tokens": -14.0625, |
|
"logps/rejected_top_tokens": -0.00084686279296875, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.0106201171875, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.026611328125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19627085377821393, |
|
"grad_norm": 2.2125416576513732, |
|
"learning_rate": 4.860112597371772e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -372.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.000904083251953125, |
|
"logps/rejected": -328.0, |
|
"logps/rejected_bottom_tokens": -14.0, |
|
"logps/rejected_top_tokens": -0.0009002685546875, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.017333984375, |
|
"rewards/margins": 0.0361328125, |
|
"rewards/rejected": -0.053466796875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19627085377821393, |
|
"eval_logits/chosen": -1.09375, |
|
"eval_logits/rejected": -1.15625, |
|
"eval_logps/chosen": -396.0, |
|
"eval_logps/chosen_bottom_tokens": -14.0625, |
|
"eval_logps/chosen_top_tokens": -0.0008697509765625, |
|
"eval_logps/rejected": -344.0, |
|
"eval_logps/rejected_bottom_tokens": -14.0, |
|
"eval_logps/rejected_top_tokens": -0.0008697509765625, |
|
"eval_loss": 0.6789160370826721, |
|
"eval_rewards/accuracies": 0.5880597233772278, |
|
"eval_rewards/chosen": -0.0274658203125, |
|
"eval_rewards/margins": 0.033203125, |
|
"eval_rewards/rejected": -0.060791015625, |
|
"eval_runtime": 111.5869, |
|
"eval_samples_per_second": 17.923, |
|
"eval_steps_per_second": 0.6, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21589793915603533, |
|
"grad_norm": 2.438395616681449, |
|
"learning_rate": 4.798048466485017e-07, |
|
"logits/chosen": -1.1015625, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -344.0, |
|
"logps/chosen_bottom_tokens": -14.0, |
|
"logps/chosen_top_tokens": -0.000835418701171875, |
|
"logps/rejected": -332.0, |
|
"logps/rejected_bottom_tokens": -14.0, |
|
"logps/rejected_top_tokens": -0.000873565673828125, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -0.037109375, |
|
"rewards/margins": 0.02001953125, |
|
"rewards/rejected": -0.05712890625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23552502453385674, |
|
"grad_norm": 2.226213549318803, |
|
"learning_rate": 4.725176028314541e-07, |
|
"logits/chosen": -1.109375, |
|
"logits/rejected": -1.1171875, |
|
"logps/chosen": -372.0, |
|
"logps/chosen_bottom_tokens": -14.0, |
|
"logps/chosen_top_tokens": -0.0008544921875, |
|
"logps/rejected": -354.0, |
|
"logps/rejected_bottom_tokens": -14.0, |
|
"logps/rejected_top_tokens": -0.0008544921875, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -0.03564453125, |
|
"rewards/margins": 0.0517578125, |
|
"rewards/rejected": -0.08740234375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.25515210991167814, |
|
"grad_norm": 2.4135162897156706, |
|
"learning_rate": 4.641838020498713e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.1640625, |
|
"logps/chosen": -408.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.000934600830078125, |
|
"logps/rejected": -338.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.00092315673828125, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.5849999785423279, |
|
"rewards/chosen": -0.0703125, |
|
"rewards/margins": 0.0517578125, |
|
"rewards/rejected": -0.1220703125, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2747791952894995, |
|
"grad_norm": 2.4502181786024004, |
|
"learning_rate": 4.5484264029156733e-07, |
|
"logits/chosen": -1.1015625, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -386.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.000812530517578125, |
|
"logps/rejected": -336.0, |
|
"logps/rejected_bottom_tokens": -14.125, |
|
"logps/rejected_top_tokens": -0.00083160400390625, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.5900000333786011, |
|
"rewards/chosen": -0.1015625, |
|
"rewards/margins": 0.048828125, |
|
"rewards/rejected": -0.150390625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2944062806673209, |
|
"grad_norm": 2.4663119079457614, |
|
"learning_rate": 4.445380514196192e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -428.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.00087738037109375, |
|
"logps/rejected": -356.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.000858306884765625, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.6350000500679016, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.0673828125, |
|
"rewards/rejected": -0.1962890625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3140333660451423, |
|
"grad_norm": 2.455591342132379, |
|
"learning_rate": 4.33318500540218e-07, |
|
"logits/chosen": -1.0859375, |
|
"logits/rejected": -1.1328125, |
|
"logps/chosen": -408.0, |
|
"logps/chosen_bottom_tokens": -14.125, |
|
"logps/chosen_top_tokens": -0.00089263916015625, |
|
"logps/rejected": -368.0, |
|
"logps/rejected_bottom_tokens": -14.0625, |
|
"logps/rejected_top_tokens": -0.000904083251953125, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.6050000190734863, |
|
"rewards/chosen": -0.16796875, |
|
"rewards/margins": 0.078125, |
|
"rewards/rejected": -0.24609375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3336604514229637, |
|
"grad_norm": 2.436300399124971, |
|
"learning_rate": 4.2123675605892985e-07, |
|
"logits/chosen": -1.078125, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -422.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.00101470947265625, |
|
"logps/rejected": -364.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.00098419189453125, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.6450000405311584, |
|
"rewards/chosen": -0.1826171875, |
|
"rewards/margins": 0.115234375, |
|
"rewards/rejected": -0.296875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.35328753680078506, |
|
"grad_norm": 2.5546008416763035, |
|
"learning_rate": 4.0834964149744333e-07, |
|
"logits/chosen": -1.1015625, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -416.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.00093841552734375, |
|
"logps/rejected": -380.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.00099945068359375, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -0.232421875, |
|
"rewards/margins": 0.06396484375, |
|
"rewards/rejected": -0.296875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3729146221786065, |
|
"grad_norm": 2.555290762655567, |
|
"learning_rate": 3.947177682380738e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -378.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.000789642333984375, |
|
"logps/rejected": -356.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.000823974609375, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.6450001001358032, |
|
"rewards/chosen": -0.2412109375, |
|
"rewards/margins": 0.11669921875, |
|
"rewards/rejected": -0.357421875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39254170755642787, |
|
"grad_norm": 2.7737043586573313, |
|
"learning_rate": 3.804052504529933e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -392.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.0008392333984375, |
|
"logps/rejected": -370.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.00087738037109375, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.6350000500679016, |
|
"rewards/chosen": -0.271484375, |
|
"rewards/margins": 0.1396484375, |
|
"rewards/rejected": -0.41015625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39254170755642787, |
|
"eval_logits/chosen": -1.15625, |
|
"eval_logits/rejected": -1.203125, |
|
"eval_logps/chosen": -422.0, |
|
"eval_logps/chosen_bottom_tokens": -14.375, |
|
"eval_logps/chosen_top_tokens": -0.000911712646484375, |
|
"eval_logps/rejected": -380.0, |
|
"eval_logps/rejected_bottom_tokens": -14.3125, |
|
"eval_logps/rejected_top_tokens": -0.000919342041015625, |
|
"eval_loss": 0.6488671898841858, |
|
"eval_rewards/accuracies": 0.6447761058807373, |
|
"eval_rewards/chosen": -0.287109375, |
|
"eval_rewards/margins": 0.13671875, |
|
"eval_rewards/rejected": -0.423828125, |
|
"eval_runtime": 111.5112, |
|
"eval_samples_per_second": 17.935, |
|
"eval_steps_per_second": 0.601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41216879293424924, |
|
"grad_norm": 2.8286672144445277, |
|
"learning_rate": 3.654794035589483e-07, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.1640625, |
|
"logps/chosen": -362.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.00091552734375, |
|
"logps/rejected": -344.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000926971435546875, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.6149999499320984, |
|
"rewards/chosen": -0.298828125, |
|
"rewards/margins": 0.12060546875, |
|
"rewards/rejected": -0.419921875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43179587831207067, |
|
"grad_norm": 2.98579141751378, |
|
"learning_rate": 3.5001042761570826e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -414.0, |
|
"logps/chosen_bottom_tokens": -14.5, |
|
"logps/chosen_top_tokens": -0.000762939453125, |
|
"logps/rejected": -398.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.000743865966796875, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": -0.333984375, |
|
"rewards/margins": 0.11279296875, |
|
"rewards/rejected": -0.447265625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45142296368989204, |
|
"grad_norm": 2.849801650804548, |
|
"learning_rate": 3.34071077157304e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -388.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.00075531005859375, |
|
"logps/rejected": -354.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.000827789306640625, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.353515625, |
|
"rewards/margins": 0.1337890625, |
|
"rewards/rejected": -0.48828125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47105004906771347, |
|
"grad_norm": 3.020709895469043, |
|
"learning_rate": 3.1773631900892204e-07, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -416.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.000759124755859375, |
|
"logps/rejected": -396.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.0007781982421875, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -0.38671875, |
|
"rewards/margins": 0.1337890625, |
|
"rewards/rejected": -0.51953125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49067713444553485, |
|
"grad_norm": 2.801068325901482, |
|
"learning_rate": 3.0108297969883103e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -426.0, |
|
"logps/chosen_bottom_tokens": -14.4375, |
|
"logps/chosen_top_tokens": -0.0008392333984375, |
|
"logps/rejected": -398.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.00081634521484375, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": -0.39453125, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": -0.58203125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5103042198233563, |
|
"grad_norm": 2.8119914001202835, |
|
"learning_rate": 2.8418938412365013e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -396.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.000865936279296875, |
|
"logps/rejected": -372.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.000885009765625, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.6350000500679016, |
|
"rewards/chosen": -0.373046875, |
|
"rewards/margins": 0.171875, |
|
"rewards/rejected": -0.546875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5299313052011776, |
|
"grad_norm": 2.914608701481186, |
|
"learning_rate": 2.671349871664101e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -398.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.000782012939453125, |
|
"logps/rejected": -386.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.00077056884765625, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": -0.40625, |
|
"rewards/margins": 0.17578125, |
|
"rewards/rejected": -0.58203125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.549558390578999, |
|
"grad_norm": 2.974677635397429, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -438.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.000911712646484375, |
|
"logps/rejected": -402.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.0009002685546875, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -0.42578125, |
|
"rewards/margins": 0.21484375, |
|
"rewards/rejected": -0.640625, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5691854759568205, |
|
"grad_norm": 3.4767790428686234, |
|
"learning_rate": 2.3286501283358982e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -412.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.0008392333984375, |
|
"logps/rejected": -376.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000885009765625, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": -0.4609375, |
|
"rewards/margins": 0.16015625, |
|
"rewards/rejected": -0.62109375, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5888125613346418, |
|
"grad_norm": 3.0983859451271565, |
|
"learning_rate": 2.1581061587634987e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -428.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.00075531005859375, |
|
"logps/rejected": -388.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.00079345703125, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.5999999642372131, |
|
"rewards/chosen": -0.482421875, |
|
"rewards/margins": 0.162109375, |
|
"rewards/rejected": -0.64453125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5888125613346418, |
|
"eval_logits/chosen": -1.1875, |
|
"eval_logits/rejected": -1.234375, |
|
"eval_logps/chosen": -438.0, |
|
"eval_logps/chosen_bottom_tokens": -14.375, |
|
"eval_logps/chosen_top_tokens": -0.0007476806640625, |
|
"eval_logps/rejected": -406.0, |
|
"eval_logps/rejected_bottom_tokens": -14.3125, |
|
"eval_logps/rejected_top_tokens": -0.000759124755859375, |
|
"eval_loss": 0.6303857564926147, |
|
"eval_rewards/accuracies": 0.6626865863800049, |
|
"eval_rewards/chosen": -0.451171875, |
|
"eval_rewards/margins": 0.2275390625, |
|
"eval_rewards/rejected": -0.6796875, |
|
"eval_runtime": 111.5027, |
|
"eval_samples_per_second": 17.937, |
|
"eval_steps_per_second": 0.601, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6084396467124632, |
|
"grad_norm": 3.1412458629194835, |
|
"learning_rate": 1.9891702030116897e-07, |
|
"logits/chosen": -1.140625, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -446.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.00074005126953125, |
|
"logps/rejected": -358.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.0007171630859375, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.6949999928474426, |
|
"rewards/chosen": -0.408203125, |
|
"rewards/margins": 0.2451171875, |
|
"rewards/rejected": -0.65625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6280667320902846, |
|
"grad_norm": 3.1923082526436986, |
|
"learning_rate": 1.8226368099107792e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -424.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.00080108642578125, |
|
"logps/rejected": -364.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000843048095703125, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.42578125, |
|
"rewards/margins": 0.2216796875, |
|
"rewards/rejected": -0.6484375, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.647693817468106, |
|
"grad_norm": 3.064211696764281, |
|
"learning_rate": 1.6592892284269594e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -408.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.0007171630859375, |
|
"logps/rejected": -386.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.000732421875, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.6799999475479126, |
|
"rewards/chosen": -0.431640625, |
|
"rewards/margins": 0.259765625, |
|
"rewards/rejected": -0.69140625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6673209028459274, |
|
"grad_norm": 3.1791023826814353, |
|
"learning_rate": 1.4998957238429172e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -408.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.000804901123046875, |
|
"logps/rejected": -380.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.00077056884765625, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.6300000548362732, |
|
"rewards/chosen": -0.5078125, |
|
"rewards/margins": 0.244140625, |
|
"rewards/rejected": -0.75390625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6869479882237488, |
|
"grad_norm": 3.295570474728778, |
|
"learning_rate": 1.345205964410517e-07, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -392.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.000858306884765625, |
|
"logps/rejected": -372.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.00089263916015625, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.5850000381469727, |
|
"rewards/chosen": -0.49609375, |
|
"rewards/margins": 0.2236328125, |
|
"rewards/rejected": -0.71875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7065750736015701, |
|
"grad_norm": 3.5211819482445184, |
|
"learning_rate": 1.1959474954700665e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -424.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.00067138671875, |
|
"logps/rejected": -416.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000675201416015625, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.7049999833106995, |
|
"rewards/chosen": -0.44140625, |
|
"rewards/margins": 0.234375, |
|
"rewards/rejected": -0.67578125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7262021589793916, |
|
"grad_norm": 3.3333877037469026, |
|
"learning_rate": 1.0528223176192615e-07, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -442.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.00069427490234375, |
|
"logps/rejected": -398.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.00067901611328125, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.5234375, |
|
"rewards/margins": 0.2138671875, |
|
"rewards/rejected": -0.73828125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.745829244357213, |
|
"grad_norm": 3.3039144354882657, |
|
"learning_rate": 9.16503585025567e-08, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -420.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.00090789794921875, |
|
"logps/rejected": -412.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.00104522705078125, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5, |
|
"rewards/margins": 0.2041015625, |
|
"rewards/rejected": -0.703125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7654563297350343, |
|
"grad_norm": 3.460907844274303, |
|
"learning_rate": 7.876324394107017e-08, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -442.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.00067901611328125, |
|
"logps/rejected": -418.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.00069427490234375, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.6350000500679016, |
|
"rewards/chosen": -0.50390625, |
|
"rewards/margins": 0.2255859375, |
|
"rewards/rejected": -0.7265625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"grad_norm": 3.2842912290921897, |
|
"learning_rate": 6.668149945978201e-08, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -440.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.00070953369140625, |
|
"logps/rejected": -420.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000732421875, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -0.482421875, |
|
"rewards/margins": 0.291015625, |
|
"rewards/rejected": -0.7734375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"eval_logits/chosen": -1.1875, |
|
"eval_logits/rejected": -1.234375, |
|
"eval_logps/chosen": -444.0, |
|
"eval_logps/chosen_bottom_tokens": -14.3125, |
|
"eval_logps/chosen_top_tokens": -0.00067138671875, |
|
"eval_logps/rejected": -414.0, |
|
"eval_logps/rejected_bottom_tokens": -14.25, |
|
"eval_logps/rejected_top_tokens": -0.00066375732421875, |
|
"eval_loss": 0.6267920136451721, |
|
"eval_rewards/accuracies": 0.6567164063453674, |
|
"eval_rewards/chosen": -0.50390625, |
|
"eval_rewards/margins": 0.2578125, |
|
"eval_rewards/rejected": -0.76171875, |
|
"eval_runtime": 111.5791, |
|
"eval_samples_per_second": 17.925, |
|
"eval_steps_per_second": 0.6, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8047105004906772, |
|
"grad_norm": 3.3007954730404303, |
|
"learning_rate": 5.546194858038072e-08, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -416.0, |
|
"logps/chosen_bottom_tokens": -14.3125, |
|
"logps/chosen_top_tokens": -0.000766754150390625, |
|
"logps/rejected": -374.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000827789306640625, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.6699999570846558, |
|
"rewards/chosen": -0.51171875, |
|
"rewards/margins": 0.28515625, |
|
"rewards/rejected": -0.796875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8243375858684985, |
|
"grad_norm": 3.9743687860867185, |
|
"learning_rate": 4.5157359708432626e-08, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -394.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.00075531005859375, |
|
"logps/rejected": -412.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.0007476806640625, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.474609375, |
|
"rewards/margins": 0.234375, |
|
"rewards/rejected": -0.70703125, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8439646712463199, |
|
"grad_norm": 3.1969688623984633, |
|
"learning_rate": 3.581619795012874e-08, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -400.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.000782012939453125, |
|
"logps/rejected": -404.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.000743865966796875, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.474609375, |
|
"rewards/margins": 0.267578125, |
|
"rewards/rejected": -0.7421875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8635917566241413, |
|
"grad_norm": 3.705663203159775, |
|
"learning_rate": 2.748239716854589e-08, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -424.0, |
|
"logps/chosen_bottom_tokens": -14.1875, |
|
"logps/chosen_top_tokens": -0.000728607177734375, |
|
"logps/rejected": -420.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.000751495361328125, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.5849999785423279, |
|
"rewards/chosen": -0.51171875, |
|
"rewards/margins": 0.1435546875, |
|
"rewards/rejected": -0.65625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8832188420019627, |
|
"grad_norm": 3.9792023056235455, |
|
"learning_rate": 2.0195153351498323e-08, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -432.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.000732421875, |
|
"logps/rejected": -420.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.000705718994140625, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6149999499320984, |
|
"rewards/chosen": -0.53125, |
|
"rewards/margins": 0.2421875, |
|
"rewards/rejected": -0.7734375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9028459273797841, |
|
"grad_norm": 3.598443005581659, |
|
"learning_rate": 1.3988740262822846e-08, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -428.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.00067138671875, |
|
"logps/rejected": -410.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.000667572021484375, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.6349999904632568, |
|
"rewards/chosen": -0.490234375, |
|
"rewards/margins": 0.216796875, |
|
"rewards/rejected": -0.70703125, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9224730127576055, |
|
"grad_norm": 3.423571391469107, |
|
"learning_rate": 8.892348244137788e-09, |
|
"logits/chosen": -1.1875, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -474.0, |
|
"logps/chosen_bottom_tokens": -14.5625, |
|
"logps/chosen_top_tokens": -0.000675201416015625, |
|
"logps/rejected": -444.0, |
|
"logps/rejected_bottom_tokens": -14.375, |
|
"logps/rejected_top_tokens": -0.000652313232421875, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.494140625, |
|
"rewards/margins": 0.271484375, |
|
"rewards/rejected": -0.765625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9421000981354269, |
|
"grad_norm": 3.1667123948106584, |
|
"learning_rate": 4.929946925231076e-09, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -410.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.0007476806640625, |
|
"logps/rejected": -412.0, |
|
"logps/rejected_bottom_tokens": -14.25, |
|
"logps/rejected_top_tokens": -0.00095367431640625, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.6049999594688416, |
|
"rewards/chosen": -0.48046875, |
|
"rewards/margins": 0.1953125, |
|
"rewards/rejected": -0.67578125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9617271835132483, |
|
"grad_norm": 3.5902417143779024, |
|
"learning_rate": 2.1201724887858484e-09, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -422.0, |
|
"logps/chosen_bottom_tokens": -14.25, |
|
"logps/chosen_top_tokens": -0.00072479248046875, |
|
"logps/rejected": -412.0, |
|
"logps/rejected_bottom_tokens": -14.1875, |
|
"logps/rejected_top_tokens": -0.000743865966796875, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.5949999690055847, |
|
"rewards/chosen": -0.5390625, |
|
"rewards/margins": 0.2265625, |
|
"rewards/rejected": -0.765625, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9813542688910697, |
|
"grad_norm": 3.3154898943344704, |
|
"learning_rate": 4.762400196664518e-10, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -428.0, |
|
"logps/chosen_bottom_tokens": -14.375, |
|
"logps/chosen_top_tokens": -0.000621795654296875, |
|
"logps/rejected": -388.0, |
|
"logps/rejected_bottom_tokens": -14.3125, |
|
"logps/rejected_top_tokens": -0.0006256103515625, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.578125, |
|
"rewards/margins": 0.1787109375, |
|
"rewards/rejected": -0.75390625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9813542688910697, |
|
"eval_logits/chosen": -1.1953125, |
|
"eval_logits/rejected": -1.2421875, |
|
"eval_logps/chosen": -446.0, |
|
"eval_logps/chosen_bottom_tokens": -14.375, |
|
"eval_logps/chosen_top_tokens": -0.000743865966796875, |
|
"eval_logps/rejected": -416.0, |
|
"eval_logps/rejected_bottom_tokens": -14.3125, |
|
"eval_logps/rejected_top_tokens": -0.0007476806640625, |
|
"eval_loss": 0.6259472370147705, |
|
"eval_rewards/accuracies": 0.6567164659500122, |
|
"eval_rewards/chosen": -0.5234375, |
|
"eval_rewards/margins": 0.26171875, |
|
"eval_rewards/rejected": -0.78515625, |
|
"eval_runtime": 111.4505, |
|
"eval_samples_per_second": 17.945, |
|
"eval_steps_per_second": 0.601, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9990186457311089, |
|
"step": 509, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6464882252961105, |
|
"train_runtime": 8284.9703, |
|
"train_samples_per_second": 7.379, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 509, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|