File size: 10,659 Bytes
f44f70b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006279434850863423,
"grad_norm": 5.557445139643298,
"learning_rate": 3.125e-08,
"logits/chosen": 0.18015038967132568,
"logits/rejected": 0.2519298493862152,
"logps/chosen": -297.10906982421875,
"logps/pi_response": -130.58929443359375,
"logps/ref_response": -130.58929443359375,
"logps/rejected": -316.44769287109375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06279434850863422,
"grad_norm": 5.880023460230945,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 0.16638809442520142,
"logits/rejected": 0.3159521222114563,
"logps/chosen": -243.77159118652344,
"logps/pi_response": -120.18633270263672,
"logps/ref_response": -120.15902709960938,
"logps/rejected": -281.09716796875,
"loss": 0.6928,
"rewards/accuracies": 0.4444444477558136,
"rewards/chosen": -0.0007160517852753401,
"rewards/margins": 0.00040519109461456537,
"rewards/rejected": -0.0011212429963052273,
"step": 10
},
{
"epoch": 0.12558869701726844,
"grad_norm": 6.011032264913819,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": 0.1316775530576706,
"logits/rejected": 0.32217010855674744,
"logps/chosen": -244.0759735107422,
"logps/pi_response": -121.6043701171875,
"logps/ref_response": -121.85536193847656,
"logps/rejected": -266.6847229003906,
"loss": 0.6884,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.011644470505416393,
"rewards/margins": 0.011915634386241436,
"rewards/rejected": -0.02356010302901268,
"step": 20
},
{
"epoch": 0.18838304552590268,
"grad_norm": 6.296068063682766,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": 0.17213600873947144,
"logits/rejected": 0.3042981028556824,
"logps/chosen": -244.4438018798828,
"logps/pi_response": -109.73341369628906,
"logps/ref_response": -110.8894271850586,
"logps/rejected": -290.1441650390625,
"loss": 0.6685,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.043184880167245865,
"rewards/margins": 0.054663728922605515,
"rewards/rejected": -0.09784860908985138,
"step": 30
},
{
"epoch": 0.25117739403453687,
"grad_norm": 5.762840753548806,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": 0.2107941210269928,
"logits/rejected": 0.39838385581970215,
"logps/chosen": -287.46002197265625,
"logps/pi_response": -125.36665344238281,
"logps/ref_response": -129.86325073242188,
"logps/rejected": -316.40423583984375,
"loss": 0.6349,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.08985555917024612,
"rewards/margins": 0.1335289627313614,
"rewards/rejected": -0.22338449954986572,
"step": 40
},
{
"epoch": 0.3139717425431711,
"grad_norm": 5.876442100853928,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": 0.3732234835624695,
"logits/rejected": 0.5105798840522766,
"logps/chosen": -247.15914916992188,
"logps/pi_response": -109.07597351074219,
"logps/ref_response": -116.5090560913086,
"logps/rejected": -310.7102966308594,
"loss": 0.606,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.11287806183099747,
"rewards/margins": 0.24187734723091125,
"rewards/rejected": -0.3547554314136505,
"step": 50
},
{
"epoch": 0.37676609105180536,
"grad_norm": 9.315356312481537,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": 0.5162631273269653,
"logits/rejected": 0.6798213124275208,
"logps/chosen": -268.0201721191406,
"logps/pi_response": -116.46971130371094,
"logps/ref_response": -119.4989242553711,
"logps/rejected": -347.75079345703125,
"loss": 0.5814,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20100387930870056,
"rewards/margins": 0.41021671891212463,
"rewards/rejected": -0.6112205386161804,
"step": 60
},
{
"epoch": 0.43956043956043955,
"grad_norm": 7.144146562974543,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": 0.6009117960929871,
"logits/rejected": 0.767359733581543,
"logps/chosen": -236.7908172607422,
"logps/pi_response": -114.82955169677734,
"logps/ref_response": -116.70068359375,
"logps/rejected": -368.27850341796875,
"loss": 0.5568,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.2643741965293884,
"rewards/margins": 0.5534776449203491,
"rewards/rejected": -0.8178518414497375,
"step": 70
},
{
"epoch": 0.5023547880690737,
"grad_norm": 7.105190568030611,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": 0.47289925813674927,
"logits/rejected": 0.8142975568771362,
"logps/chosen": -325.69622802734375,
"logps/pi_response": -129.21812438964844,
"logps/ref_response": -127.53900146484375,
"logps/rejected": -348.5531311035156,
"loss": 0.5849,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.3693317174911499,
"rewards/margins": 0.37722334265708923,
"rewards/rejected": -0.7465550303459167,
"step": 80
},
{
"epoch": 0.565149136577708,
"grad_norm": 6.295924145315453,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": 0.43639689683914185,
"logits/rejected": 0.8138043284416199,
"logps/chosen": -301.4584045410156,
"logps/pi_response": -135.66107177734375,
"logps/ref_response": -129.38760375976562,
"logps/rejected": -421.0269470214844,
"loss": 0.5432,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.29649025201797485,
"rewards/margins": 0.6172370910644531,
"rewards/rejected": -0.9137271642684937,
"step": 90
},
{
"epoch": 0.6279434850863422,
"grad_norm": 7.528550263483296,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": 0.6732058525085449,
"logits/rejected": 0.8459190130233765,
"logps/chosen": -283.48883056640625,
"logps/pi_response": -121.66845703125,
"logps/ref_response": -114.0061264038086,
"logps/rejected": -304.6356201171875,
"loss": 0.5426,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.30830463767051697,
"rewards/margins": 0.3349376320838928,
"rewards/rejected": -0.6432422995567322,
"step": 100
},
{
"epoch": 0.6907378335949764,
"grad_norm": 7.109656663434607,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": 0.6394161581993103,
"logits/rejected": 0.9435567855834961,
"logps/chosen": -298.015869140625,
"logps/pi_response": -136.70449829101562,
"logps/ref_response": -125.8144760131836,
"logps/rejected": -383.9209899902344,
"loss": 0.5314,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.33838868141174316,
"rewards/margins": 0.5101521015167236,
"rewards/rejected": -0.8485407829284668,
"step": 110
},
{
"epoch": 0.7535321821036107,
"grad_norm": 7.158165163184529,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.5716279745101929,
"logits/rejected": 0.8841035962104797,
"logps/chosen": -320.6312255859375,
"logps/pi_response": -131.89306640625,
"logps/ref_response": -120.58707427978516,
"logps/rejected": -369.5261535644531,
"loss": 0.5541,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.4013099670410156,
"rewards/margins": 0.5214625597000122,
"rewards/rejected": -0.9227724075317383,
"step": 120
},
{
"epoch": 0.8163265306122449,
"grad_norm": 6.942114883986432,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": 0.5996646881103516,
"logits/rejected": 0.8270283937454224,
"logps/chosen": -277.0096130371094,
"logps/pi_response": -136.15554809570312,
"logps/ref_response": -123.1449966430664,
"logps/rejected": -372.5554504394531,
"loss": 0.5271,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.3302404284477234,
"rewards/margins": 0.5928428173065186,
"rewards/rejected": -0.9230831861495972,
"step": 130
},
{
"epoch": 0.8791208791208791,
"grad_norm": 7.301927243964412,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": 0.5675501823425293,
"logits/rejected": 0.8693594932556152,
"logps/chosen": -284.0587158203125,
"logps/pi_response": -131.0335235595703,
"logps/ref_response": -121.63087463378906,
"logps/rejected": -404.29278564453125,
"loss": 0.53,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.3401293456554413,
"rewards/margins": 0.748315155506134,
"rewards/rejected": -1.088444471359253,
"step": 140
},
{
"epoch": 0.9419152276295133,
"grad_norm": 8.46328336009499,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": 0.5060345530509949,
"logits/rejected": 0.8951950073242188,
"logps/chosen": -317.99163818359375,
"logps/pi_response": -145.62229919433594,
"logps/ref_response": -132.86119079589844,
"logps/rejected": -398.7874755859375,
"loss": 0.5193,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.39088305830955505,
"rewards/margins": 0.5848211050033569,
"rewards/rejected": -0.9757040739059448,
"step": 150
},
{
"epoch": 0.9984301412872841,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5789602717513558,
"train_runtime": 4365.2801,
"train_samples_per_second": 4.668,
"train_steps_per_second": 0.036
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|