File size: 10,932 Bytes
c336a3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9921671018276762,
"eval_steps": 500,
"global_step": 95,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 6.531942491552821,
"learning_rate": 5e-08,
"logits/chosen": -2.851747512817383,
"logits/rejected": -2.833996534347534,
"logps/chosen": -165.70089721679688,
"logps/rejected": -198.857666015625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"grad_norm": 5.930803989300868,
"learning_rate": 2.5e-07,
"logits/chosen": -2.770416259765625,
"logits/rejected": -2.7731680870056152,
"logps/chosen": -171.3281707763672,
"logps/rejected": -172.58348083496094,
"loss": 0.693,
"rewards/accuracies": 0.375,
"rewards/chosen": 0.00034834028338082135,
"rewards/margins": 4.263037408236414e-05,
"rewards/rejected": 0.0003057100111618638,
"step": 5
},
{
"epoch": 0.1,
"grad_norm": 7.205939520530408,
"learning_rate": 5e-07,
"logits/chosen": -2.785672664642334,
"logits/rejected": -2.7945070266723633,
"logps/chosen": -189.79400634765625,
"logps/rejected": -194.38011169433594,
"loss": 0.6923,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.005779535509645939,
"rewards/margins": 0.0015561816981062293,
"rewards/rejected": 0.004223353695124388,
"step": 10
},
{
"epoch": 0.16,
"grad_norm": 7.119689881451758,
"learning_rate": 4.957432749209755e-07,
"logits/chosen": -2.841862678527832,
"logits/rejected": -2.8522396087646484,
"logps/chosen": -196.4453582763672,
"logps/rejected": -186.3593292236328,
"loss": 0.6891,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.0351785309612751,
"rewards/margins": 0.009719189256429672,
"rewards/rejected": 0.025459343567490578,
"step": 15
},
{
"epoch": 0.21,
"grad_norm": 7.025742204681022,
"learning_rate": 4.83118057351089e-07,
"logits/chosen": -2.8577423095703125,
"logits/rejected": -2.8679168224334717,
"logps/chosen": -163.30587768554688,
"logps/rejected": -176.16122436523438,
"loss": 0.6822,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.054369617253541946,
"rewards/margins": 0.01927168108522892,
"rewards/rejected": 0.035097938030958176,
"step": 20
},
{
"epoch": 0.26,
"grad_norm": 7.047833772227819,
"learning_rate": 4.6255428393240354e-07,
"logits/chosen": -2.8176944255828857,
"logits/rejected": -2.8154852390289307,
"logps/chosen": -127.52900695800781,
"logps/rejected": -149.99598693847656,
"loss": 0.6734,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.039430197328329086,
"rewards/margins": 0.044888969510793686,
"rewards/rejected": -0.005458767991513014,
"step": 25
},
{
"epoch": 0.31,
"grad_norm": 7.4255717276037405,
"learning_rate": 4.3475222930516473e-07,
"logits/chosen": -2.781858444213867,
"logits/rejected": -2.7814831733703613,
"logps/chosen": -161.177734375,
"logps/rejected": -173.82421875,
"loss": 0.665,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.01698228344321251,
"rewards/margins": 0.0684308260679245,
"rewards/rejected": -0.08541311323642731,
"step": 30
},
{
"epoch": 0.37,
"grad_norm": 7.891881929971765,
"learning_rate": 4.006586590948141e-07,
"logits/chosen": -2.848252296447754,
"logits/rejected": -2.8431050777435303,
"logps/chosen": -192.15963745117188,
"logps/rejected": -209.07540893554688,
"loss": 0.6544,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.008883295580744743,
"rewards/margins": 0.0794510543346405,
"rewards/rejected": -0.0883343443274498,
"step": 35
},
{
"epoch": 0.42,
"grad_norm": 9.209834953181781,
"learning_rate": 3.614345889441346e-07,
"logits/chosen": -2.7681477069854736,
"logits/rejected": -2.78022837638855,
"logps/chosen": -135.9792022705078,
"logps/rejected": -164.3667449951172,
"loss": 0.6465,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.031201759353280067,
"rewards/margins": 0.13447019457817078,
"rewards/rejected": -0.16567197442054749,
"step": 40
},
{
"epoch": 0.47,
"grad_norm": 13.835886251568184,
"learning_rate": 3.184157475180207e-07,
"logits/chosen": -2.7284975051879883,
"logits/rejected": -2.7436182498931885,
"logps/chosen": -205.8560028076172,
"logps/rejected": -212.56710815429688,
"loss": 0.6496,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.17208269238471985,
"rewards/margins": 0.13685402274131775,
"rewards/rejected": -0.30893674492836,
"step": 45
},
{
"epoch": 0.52,
"grad_norm": 10.309186722273289,
"learning_rate": 2.730670898658255e-07,
"logits/chosen": -2.7203848361968994,
"logits/rejected": -2.7220139503479004,
"logps/chosen": -183.94479370117188,
"logps/rejected": -218.1922149658203,
"loss": 0.6248,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.09618374705314636,
"rewards/margins": 0.18668127059936523,
"rewards/rejected": -0.2828650176525116,
"step": 50
},
{
"epoch": 0.57,
"grad_norm": 12.04836501966109,
"learning_rate": 2.2693291013417452e-07,
"logits/chosen": -2.6110920906066895,
"logits/rejected": -2.6190452575683594,
"logps/chosen": -151.388916015625,
"logps/rejected": -174.0006561279297,
"loss": 0.6317,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.032760851085186005,
"rewards/margins": 0.17945989966392517,
"rewards/rejected": -0.2122207134962082,
"step": 55
},
{
"epoch": 0.63,
"grad_norm": 14.818475765214615,
"learning_rate": 1.8158425248197928e-07,
"logits/chosen": -2.69221568107605,
"logits/rejected": -2.689034938812256,
"logps/chosen": -181.30128479003906,
"logps/rejected": -231.5193634033203,
"loss": 0.6122,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.09343220293521881,
"rewards/margins": 0.2633873522281647,
"rewards/rejected": -0.3568195104598999,
"step": 60
},
{
"epoch": 0.68,
"grad_norm": 15.265728023102268,
"learning_rate": 1.3856541105586545e-07,
"logits/chosen": -2.7168681621551514,
"logits/rejected": -2.7309060096740723,
"logps/chosen": -185.16700744628906,
"logps/rejected": -220.42764282226562,
"loss": 0.6045,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.23089858889579773,
"rewards/margins": 0.27521029114723206,
"rewards/rejected": -0.5061088800430298,
"step": 65
},
{
"epoch": 0.73,
"grad_norm": 13.52741638941588,
"learning_rate": 9.934134090518592e-08,
"logits/chosen": -2.6834919452667236,
"logits/rejected": -2.6923632621765137,
"logps/chosen": -200.9665069580078,
"logps/rejected": -217.9497528076172,
"loss": 0.6094,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.23829719424247742,
"rewards/margins": 0.2035256326198578,
"rewards/rejected": -0.4418228268623352,
"step": 70
},
{
"epoch": 0.78,
"grad_norm": 14.871873879280589,
"learning_rate": 6.524777069483525e-08,
"logits/chosen": -2.6725871562957764,
"logits/rejected": -2.6699538230895996,
"logps/chosen": -185.2981719970703,
"logps/rejected": -229.42092895507812,
"loss": 0.5985,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.13438589870929718,
"rewards/margins": 0.32371044158935547,
"rewards/rejected": -0.45809632539749146,
"step": 75
},
{
"epoch": 0.84,
"grad_norm": 12.313204564006284,
"learning_rate": 3.74457160675965e-08,
"logits/chosen": -2.6488523483276367,
"logits/rejected": -2.6512537002563477,
"logps/chosen": -177.8891143798828,
"logps/rejected": -211.4371795654297,
"loss": 0.6019,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.09899892657995224,
"rewards/margins": 0.26206719875335693,
"rewards/rejected": -0.36106616258621216,
"step": 80
},
{
"epoch": 0.89,
"grad_norm": 17.242389025181602,
"learning_rate": 1.6881942648911074e-08,
"logits/chosen": -2.6852784156799316,
"logits/rejected": -2.6899216175079346,
"logps/chosen": -171.39414978027344,
"logps/rejected": -207.66738891601562,
"loss": 0.6214,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.11210503429174423,
"rewards/margins": 0.2755950093269348,
"rewards/rejected": -0.38770005106925964,
"step": 85
},
{
"epoch": 0.94,
"grad_norm": 13.932688124952723,
"learning_rate": 4.256725079024553e-09,
"logits/chosen": -2.6324477195739746,
"logits/rejected": -2.6469483375549316,
"logps/chosen": -181.08218383789062,
"logps/rejected": -215.79953002929688,
"loss": 0.6081,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.12142710387706757,
"rewards/margins": 0.2807455062866211,
"rewards/rejected": -0.40217262506484985,
"step": 90
},
{
"epoch": 0.99,
"grad_norm": 15.247505163019246,
"learning_rate": 0.0,
"logits/chosen": -2.682211399078369,
"logits/rejected": -2.697298765182495,
"logps/chosen": -198.21182250976562,
"logps/rejected": -223.2611541748047,
"loss": 0.6054,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.12603162229061127,
"rewards/margins": 0.2787989377975464,
"rewards/rejected": -0.40483060479164124,
"step": 95
},
{
"epoch": 0.99,
"step": 95,
"total_flos": 0.0,
"train_loss": 0.6401761331056294,
"train_runtime": 2555.4095,
"train_samples_per_second": 4.785,
"train_steps_per_second": 0.037
}
],
"logging_steps": 5,
"max_steps": 95,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|