|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 74.50819179863889, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7660439014434814, |
|
"logits/rejected": -2.717564582824707, |
|
"logps/chosen": -269.8568420410156, |
|
"logps/rejected": -360.52459716796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 71.5827858042053, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.592801809310913, |
|
"logits/rejected": -2.5633366107940674, |
|
"logps/chosen": -264.5331726074219, |
|
"logps/rejected": -251.33367919921875, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.2647041380405426, |
|
"rewards/margins": 0.0454571396112442, |
|
"rewards/rejected": 0.2192470282316208, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 33.37630632393394, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6635663509368896, |
|
"logits/rejected": -2.6177525520324707, |
|
"logps/chosen": -275.1928405761719, |
|
"logps/rejected": -290.4365539550781, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 6.3604888916015625, |
|
"rewards/margins": -0.009852093644440174, |
|
"rewards/rejected": 6.370340824127197, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 22.1278736890366, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7272486686706543, |
|
"logits/rejected": -2.667067527770996, |
|
"logps/chosen": -285.1613464355469, |
|
"logps/rejected": -249.3108367919922, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 15.510467529296875, |
|
"rewards/margins": 0.8711569905281067, |
|
"rewards/rejected": 14.639310836791992, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 17.071895487907064, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.6888694763183594, |
|
"logits/rejected": -2.6701016426086426, |
|
"logps/chosen": -247.84716796875, |
|
"logps/rejected": -227.38131713867188, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 19.278215408325195, |
|
"rewards/margins": 2.267552137374878, |
|
"rewards/rejected": 17.010662078857422, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 14.78162706214556, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.659508466720581, |
|
"logits/rejected": -2.6249804496765137, |
|
"logps/chosen": -259.9454650878906, |
|
"logps/rejected": -272.14227294921875, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 19.786420822143555, |
|
"rewards/margins": -0.8553922772407532, |
|
"rewards/rejected": 20.64181137084961, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 14.285832773490087, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.6977388858795166, |
|
"logits/rejected": -2.654181957244873, |
|
"logps/chosen": -247.1780242919922, |
|
"logps/rejected": -275.7373962402344, |
|
"loss": 0.3521, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 24.428516387939453, |
|
"rewards/margins": 2.0845706462860107, |
|
"rewards/rejected": 22.343944549560547, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 14.416469937136577, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.6118428707122803, |
|
"logits/rejected": -2.625479221343994, |
|
"logps/chosen": -239.4540252685547, |
|
"logps/rejected": -232.90463256835938, |
|
"loss": 0.3304, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 26.162424087524414, |
|
"rewards/margins": 2.349818706512451, |
|
"rewards/rejected": 23.812606811523438, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 15.840881084472352, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.7612788677215576, |
|
"logits/rejected": -2.7243030071258545, |
|
"logps/chosen": -295.0336608886719, |
|
"logps/rejected": -240.8730010986328, |
|
"loss": 0.3248, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 27.784252166748047, |
|
"rewards/margins": 4.598628997802734, |
|
"rewards/rejected": 23.185623168945312, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 13.661268677283298, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.6661014556884766, |
|
"logits/rejected": -2.645249128341675, |
|
"logps/chosen": -231.57553100585938, |
|
"logps/rejected": -228.09091186523438, |
|
"loss": 0.3223, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 27.535770416259766, |
|
"rewards/margins": 3.228619337081909, |
|
"rewards/rejected": 24.30714988708496, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.61288143003843, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.6386702060699463, |
|
"logits/rejected": -2.6339759826660156, |
|
"logps/chosen": -233.39047241210938, |
|
"logps/rejected": -232.5922393798828, |
|
"loss": 0.3163, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 26.968032836914062, |
|
"rewards/margins": 2.5318057537078857, |
|
"rewards/rejected": 24.436227798461914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.6968541145324707, |
|
"eval_logits/rejected": -2.670072555541992, |
|
"eval_logps/chosen": -235.37875366210938, |
|
"eval_logps/rejected": -238.44345092773438, |
|
"eval_loss": 0.31289389729499817, |
|
"eval_rewards/accuracies": 0.58203125, |
|
"eval_rewards/chosen": 27.21471405029297, |
|
"eval_rewards/margins": 2.99098801612854, |
|
"eval_rewards/rejected": 24.223726272583008, |
|
"eval_runtime": 96.735, |
|
"eval_samples_per_second": 20.675, |
|
"eval_steps_per_second": 0.331, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.688620320219954, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.7125041484832764, |
|
"logits/rejected": -2.6624934673309326, |
|
"logps/chosen": -276.029052734375, |
|
"logps/rejected": -234.1141815185547, |
|
"loss": 0.3136, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 28.551036834716797, |
|
"rewards/margins": 4.829342842102051, |
|
"rewards/rejected": 23.72169303894043, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 14.849649400244427, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.6516470909118652, |
|
"logits/rejected": -2.647688865661621, |
|
"logps/chosen": -253.4019317626953, |
|
"logps/rejected": -234.5045623779297, |
|
"loss": 0.3065, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 27.765233993530273, |
|
"rewards/margins": 2.4132068157196045, |
|
"rewards/rejected": 25.352027893066406, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 12.095477452171375, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.679412364959717, |
|
"logits/rejected": -2.6742541790008545, |
|
"logps/chosen": -249.6054229736328, |
|
"logps/rejected": -241.8912811279297, |
|
"loss": 0.2993, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 32.39772415161133, |
|
"rewards/margins": 5.853152275085449, |
|
"rewards/rejected": 26.544570922851562, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 13.237989201417717, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.7010607719421387, |
|
"logits/rejected": -2.689103603363037, |
|
"logps/chosen": -284.6669921875, |
|
"logps/rejected": -270.44970703125, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 31.298425674438477, |
|
"rewards/margins": 1.071274995803833, |
|
"rewards/rejected": 30.22715187072754, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 11.533759549255185, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.6910300254821777, |
|
"logits/rejected": -2.6623480319976807, |
|
"logps/chosen": -251.215576171875, |
|
"logps/rejected": -248.98348999023438, |
|
"loss": 0.2985, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 32.008628845214844, |
|
"rewards/margins": 4.783123970031738, |
|
"rewards/rejected": 27.225509643554688, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 13.117822478323479, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.721895217895508, |
|
"logits/rejected": -2.675842523574829, |
|
"logps/chosen": -242.4053192138672, |
|
"logps/rejected": -230.8060302734375, |
|
"loss": 0.3009, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 30.662723541259766, |
|
"rewards/margins": 4.044883728027344, |
|
"rewards/rejected": 26.61783790588379, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.340151801902158, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.670436382293701, |
|
"logits/rejected": -2.632450819015503, |
|
"logps/chosen": -220.5222625732422, |
|
"logps/rejected": -204.80908203125, |
|
"loss": 0.2938, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 28.81294822692871, |
|
"rewards/margins": 2.497253179550171, |
|
"rewards/rejected": 26.31569480895996, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 11.477634324684333, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.646768093109131, |
|
"logits/rejected": -2.6306955814361572, |
|
"logps/chosen": -225.45016479492188, |
|
"logps/rejected": -200.42015075683594, |
|
"loss": 0.2914, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 30.908817291259766, |
|
"rewards/margins": 3.7578415870666504, |
|
"rewards/rejected": 27.150976181030273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 13.566633133843082, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.679771900177002, |
|
"logits/rejected": -2.6499440670013428, |
|
"logps/chosen": -241.45156860351562, |
|
"logps/rejected": -231.2630615234375, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 29.79128646850586, |
|
"rewards/margins": 1.2995483875274658, |
|
"rewards/rejected": 28.49173927307129, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 16.736011308973627, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.630007028579712, |
|
"logits/rejected": -2.6183559894561768, |
|
"logps/chosen": -230.09048461914062, |
|
"logps/rejected": -223.8180694580078, |
|
"loss": 0.2918, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 29.806177139282227, |
|
"rewards/margins": 1.575269341468811, |
|
"rewards/rejected": 28.230907440185547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.708475112915039, |
|
"eval_logits/rejected": -2.682575225830078, |
|
"eval_logps/chosen": -232.24124145507812, |
|
"eval_logps/rejected": -236.21038818359375, |
|
"eval_loss": 0.29230329394340515, |
|
"eval_rewards/accuracies": 0.58203125, |
|
"eval_rewards/chosen": 30.35222816467285, |
|
"eval_rewards/margins": 3.8954334259033203, |
|
"eval_rewards/rejected": 26.45679473876953, |
|
"eval_runtime": 96.829, |
|
"eval_samples_per_second": 20.655, |
|
"eval_steps_per_second": 0.33, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 11.417465496451523, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.6330389976501465, |
|
"logits/rejected": -2.6055209636688232, |
|
"logps/chosen": -257.6673889160156, |
|
"logps/rejected": -225.943359375, |
|
"loss": 0.2902, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 32.02475357055664, |
|
"rewards/margins": 6.720486640930176, |
|
"rewards/rejected": 25.304264068603516, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 12.04727391696027, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.5957412719726562, |
|
"logits/rejected": -2.5795822143554688, |
|
"logps/chosen": -233.29476928710938, |
|
"logps/rejected": -217.3531951904297, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 32.082313537597656, |
|
"rewards/margins": 3.7717392444610596, |
|
"rewards/rejected": 28.310577392578125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 11.505656123665526, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.6124305725097656, |
|
"logits/rejected": -2.5944228172302246, |
|
"logps/chosen": -217.5354461669922, |
|
"logps/rejected": -220.5460205078125, |
|
"loss": 0.3047, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 31.32999038696289, |
|
"rewards/margins": 4.138183116912842, |
|
"rewards/rejected": 27.19180679321289, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 11.083392566284138, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.660727024078369, |
|
"logits/rejected": -2.6385245323181152, |
|
"logps/chosen": -232.0665740966797, |
|
"logps/rejected": -219.62210083007812, |
|
"loss": 0.2834, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 31.633642196655273, |
|
"rewards/margins": 2.1873562335968018, |
|
"rewards/rejected": 29.446285247802734, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 11.463127161742676, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.6206917762756348, |
|
"logits/rejected": -2.576387405395508, |
|
"logps/chosen": -264.06439208984375, |
|
"logps/rejected": -229.7786865234375, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 34.12608337402344, |
|
"rewards/margins": 4.382205009460449, |
|
"rewards/rejected": 29.743881225585938, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 10.661524920447267, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.6774675846099854, |
|
"logits/rejected": -2.668527364730835, |
|
"logps/chosen": -260.33514404296875, |
|
"logps/rejected": -225.80810546875, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 33.976402282714844, |
|
"rewards/margins": 5.804098606109619, |
|
"rewards/rejected": 28.17230224609375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 11.916616915089687, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.6731224060058594, |
|
"logits/rejected": -2.6551766395568848, |
|
"logps/chosen": -245.6435089111328, |
|
"logps/rejected": -228.1649932861328, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 31.35245704650879, |
|
"rewards/margins": 1.8731645345687866, |
|
"rewards/rejected": 29.479290008544922, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.982078860289866, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.6452529430389404, |
|
"logits/rejected": -2.6127915382385254, |
|
"logps/chosen": -229.02554321289062, |
|
"logps/rejected": -215.188720703125, |
|
"loss": 0.2835, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 32.651554107666016, |
|
"rewards/margins": 5.653929233551025, |
|
"rewards/rejected": 26.99761962890625, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 11.17239233559609, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.675553321838379, |
|
"logits/rejected": -2.662069082260132, |
|
"logps/chosen": -219.8170928955078, |
|
"logps/rejected": -211.7806396484375, |
|
"loss": 0.2849, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 31.27024269104004, |
|
"rewards/margins": 1.0949894189834595, |
|
"rewards/rejected": 30.175247192382812, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 9.847053265544167, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.65397572517395, |
|
"logits/rejected": -2.6134414672851562, |
|
"logps/chosen": -268.84588623046875, |
|
"logps/rejected": -232.80752563476562, |
|
"loss": 0.286, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 32.15021514892578, |
|
"rewards/margins": 4.852233409881592, |
|
"rewards/rejected": 27.297988891601562, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.695726156234741, |
|
"eval_logits/rejected": -2.6716713905334473, |
|
"eval_logps/chosen": -231.15402221679688, |
|
"eval_logps/rejected": -235.42864990234375, |
|
"eval_loss": 0.29209351539611816, |
|
"eval_rewards/accuracies": 0.58203125, |
|
"eval_rewards/chosen": 31.439437866210938, |
|
"eval_rewards/margins": 4.200903415679932, |
|
"eval_rewards/rejected": 27.238534927368164, |
|
"eval_runtime": 96.789, |
|
"eval_samples_per_second": 20.664, |
|
"eval_steps_per_second": 0.331, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 11.299461074514115, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.6087942123413086, |
|
"logits/rejected": -2.607959270477295, |
|
"logps/chosen": -263.2939758300781, |
|
"logps/rejected": -229.5752716064453, |
|
"loss": 0.2804, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 32.041908264160156, |
|
"rewards/margins": 3.100654363632202, |
|
"rewards/rejected": 28.941247940063477, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 11.979925902064297, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.652468204498291, |
|
"logits/rejected": -2.6433398723602295, |
|
"logps/chosen": -260.83233642578125, |
|
"logps/rejected": -216.2664337158203, |
|
"loss": 0.2788, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 32.8377571105957, |
|
"rewards/margins": 4.280916213989258, |
|
"rewards/rejected": 28.556838989257812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.289416601586245, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.678496837615967, |
|
"logits/rejected": -2.634920835494995, |
|
"logps/chosen": -229.55624389648438, |
|
"logps/rejected": -231.64407348632812, |
|
"loss": 0.2812, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 32.6539306640625, |
|
"rewards/margins": 4.799349784851074, |
|
"rewards/rejected": 27.854583740234375, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 12.940304501019066, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.687782049179077, |
|
"logits/rejected": -2.6474757194519043, |
|
"logps/chosen": -258.529541015625, |
|
"logps/rejected": -247.69125366210938, |
|
"loss": 0.2752, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 31.464908599853516, |
|
"rewards/margins": -0.8856052160263062, |
|
"rewards/rejected": 32.35051727294922, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 13.446019747621028, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.6681811809539795, |
|
"logits/rejected": -2.6358139514923096, |
|
"logps/chosen": -227.58425903320312, |
|
"logps/rejected": -212.9467010498047, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 30.626983642578125, |
|
"rewards/margins": 2.8648905754089355, |
|
"rewards/rejected": 27.7620906829834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.212615361555141, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.691338300704956, |
|
"logits/rejected": -2.6329030990600586, |
|
"logps/chosen": -269.2547302246094, |
|
"logps/rejected": -233.14053344726562, |
|
"loss": 0.2785, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 33.437278747558594, |
|
"rewards/margins": 5.27285623550415, |
|
"rewards/rejected": 28.1644287109375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 12.701608094493194, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.6507325172424316, |
|
"logits/rejected": -2.6226696968078613, |
|
"logps/chosen": -243.0960693359375, |
|
"logps/rejected": -207.664794921875, |
|
"loss": 0.2854, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 32.23695373535156, |
|
"rewards/margins": 1.6676933765411377, |
|
"rewards/rejected": 30.569263458251953, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 11.004484883830752, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.592874526977539, |
|
"logits/rejected": -2.5939741134643555, |
|
"logps/chosen": -206.689697265625, |
|
"logps/rejected": -228.67898559570312, |
|
"loss": 0.2774, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 32.79497146606445, |
|
"rewards/margins": 2.7575299739837646, |
|
"rewards/rejected": 30.037445068359375, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 12.608909298282311, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.6360385417938232, |
|
"logits/rejected": -2.6261894702911377, |
|
"logps/chosen": -270.9910888671875, |
|
"logps/rejected": -252.8332977294922, |
|
"loss": 0.276, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 34.645816802978516, |
|
"rewards/margins": 3.508648633956909, |
|
"rewards/rejected": 31.137165069580078, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 11.347134923103408, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.632523775100708, |
|
"logits/rejected": -2.594832181930542, |
|
"logps/chosen": -236.8807830810547, |
|
"logps/rejected": -237.6399688720703, |
|
"loss": 0.2819, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 33.6544189453125, |
|
"rewards/margins": 4.281933784484863, |
|
"rewards/rejected": 29.372488021850586, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.6868975162506104, |
|
"eval_logits/rejected": -2.66192626953125, |
|
"eval_logps/chosen": -230.7387237548828, |
|
"eval_logps/rejected": -235.19105529785156, |
|
"eval_loss": 0.2787904143333435, |
|
"eval_rewards/accuracies": 0.578125, |
|
"eval_rewards/chosen": 31.854747772216797, |
|
"eval_rewards/margins": 4.3786234855651855, |
|
"eval_rewards/rejected": 27.476125717163086, |
|
"eval_runtime": 96.6885, |
|
"eval_samples_per_second": 20.685, |
|
"eval_steps_per_second": 0.331, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.175943173191595, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.6695199012756348, |
|
"logits/rejected": -2.626798152923584, |
|
"logps/chosen": -263.4989318847656, |
|
"logps/rejected": -240.9721221923828, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 35.418556213378906, |
|
"rewards/margins": 7.573515892028809, |
|
"rewards/rejected": 27.845043182373047, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 11.7624491150407, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.6308817863464355, |
|
"logits/rejected": -2.620222568511963, |
|
"logps/chosen": -264.280517578125, |
|
"logps/rejected": -247.2097625732422, |
|
"loss": 0.2882, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 32.79326248168945, |
|
"rewards/margins": 5.5407843589782715, |
|
"rewards/rejected": 27.252477645874023, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 11.16296113559481, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.577580213546753, |
|
"logits/rejected": -2.5429909229278564, |
|
"logps/chosen": -248.5481719970703, |
|
"logps/rejected": -228.4681396484375, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 30.489971160888672, |
|
"rewards/margins": 1.1781085729599, |
|
"rewards/rejected": 29.311859130859375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.453636294498436, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.654780864715576, |
|
"logits/rejected": -2.619481086730957, |
|
"logps/chosen": -251.1508026123047, |
|
"logps/rejected": -240.0060272216797, |
|
"loss": 0.2805, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 33.18633270263672, |
|
"rewards/margins": 4.080627918243408, |
|
"rewards/rejected": 29.1057071685791, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.779162534358996, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.598240375518799, |
|
"logits/rejected": -2.6028037071228027, |
|
"logps/chosen": -259.9753112792969, |
|
"logps/rejected": -276.95166015625, |
|
"loss": 0.2836, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 33.70884323120117, |
|
"rewards/margins": 3.5860488414764404, |
|
"rewards/rejected": 30.122793197631836, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.07874608208951, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.6384501457214355, |
|
"logits/rejected": -2.618943452835083, |
|
"logps/chosen": -240.47885131835938, |
|
"logps/rejected": -213.6422882080078, |
|
"loss": 0.2815, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 31.485698699951172, |
|
"rewards/margins": 2.44018292427063, |
|
"rewards/rejected": 29.045513153076172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 11.390948919388384, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.6327641010284424, |
|
"logits/rejected": -2.6079437732696533, |
|
"logps/chosen": -245.8006591796875, |
|
"logps/rejected": -253.76730346679688, |
|
"loss": 0.2778, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 32.898033142089844, |
|
"rewards/margins": 4.314266204833984, |
|
"rewards/rejected": 28.58376121520996, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.31381568898715734, |
|
"train_runtime": 7749.4814, |
|
"train_samples_per_second": 7.889, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|