File size: 7,455 Bytes
e83c27b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 207,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 1.5714285714285715e-05,
"logits/chosen": -1.823734998703003,
"logits/rejected": -1.96222984790802,
"logps/chosen": -984.5184936523438,
"logps/rejected": -29.066242218017578,
"loss": 0.7963,
"rewards/accuracies": 0.6538461446762085,
"rewards/chosen": 0.2930363118648529,
"rewards/margins": 0.29948848485946655,
"rewards/rejected": -0.00645211897790432,
"step": 13
},
{
"epoch": 0.38,
"learning_rate": 2.9516129032258067e-05,
"logits/chosen": -1.7756704092025757,
"logits/rejected": -1.890375018119812,
"logps/chosen": -1008.671630859375,
"logps/rejected": -30.05452537536621,
"loss": 0.5675,
"rewards/accuracies": 0.6538461446762085,
"rewards/chosen": 0.6647549867630005,
"rewards/margins": 0.7006121277809143,
"rewards/rejected": -0.03585716709494591,
"step": 26
},
{
"epoch": 0.57,
"learning_rate": 2.7419354838709678e-05,
"logits/chosen": -1.837444543838501,
"logits/rejected": -1.8552197217941284,
"logps/chosen": -1084.7537841796875,
"logps/rejected": -30.599714279174805,
"loss": 0.4647,
"rewards/accuracies": 0.7692307829856873,
"rewards/chosen": 0.9935499429702759,
"rewards/margins": 1.160874366760254,
"rewards/rejected": -0.16732460260391235,
"step": 39
},
{
"epoch": 0.75,
"learning_rate": 2.532258064516129e-05,
"logits/chosen": -1.745394229888916,
"logits/rejected": -1.8828259706497192,
"logps/chosen": -1103.2149658203125,
"logps/rejected": -32.83525085449219,
"loss": 0.2193,
"rewards/accuracies": 0.8846153616905212,
"rewards/chosen": 2.401637077331543,
"rewards/margins": 2.809011697769165,
"rewards/rejected": -0.4073745608329773,
"step": 52
},
{
"epoch": 0.94,
"learning_rate": 2.3225806451612902e-05,
"logits/chosen": -1.6953773498535156,
"logits/rejected": -2.03174090385437,
"logps/chosen": -1086.2177734375,
"logps/rejected": -34.20427703857422,
"loss": 0.1084,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.713472843170166,
"rewards/margins": 3.305988073348999,
"rewards/rejected": -0.5925151109695435,
"step": 65
},
{
"epoch": 1.13,
"learning_rate": 2.1129032258064516e-05,
"logits/chosen": -1.7250920534133911,
"logits/rejected": -1.886851191520691,
"logps/chosen": -902.3397827148438,
"logps/rejected": -36.79640579223633,
"loss": 0.0972,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.591458797454834,
"rewards/margins": 3.4001564979553223,
"rewards/rejected": -0.8086973428726196,
"step": 78
},
{
"epoch": 1.32,
"learning_rate": 1.903225806451613e-05,
"logits/chosen": -1.6640688180923462,
"logits/rejected": -1.9599171876907349,
"logps/chosen": -1034.3873291015625,
"logps/rejected": -38.65880584716797,
"loss": 0.0428,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.461390495300293,
"rewards/margins": 4.427910327911377,
"rewards/rejected": -0.9665195941925049,
"step": 91
},
{
"epoch": 1.51,
"learning_rate": 1.6935483870967744e-05,
"logits/chosen": -1.6940295696258545,
"logits/rejected": -1.9844238758087158,
"logps/chosen": -1174.732666015625,
"logps/rejected": -41.705257415771484,
"loss": 0.0235,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3263399600982666,
"rewards/margins": 4.630356788635254,
"rewards/rejected": -1.3040169477462769,
"step": 104
},
{
"epoch": 1.7,
"learning_rate": 1.4838709677419355e-05,
"logits/chosen": -1.6856719255447388,
"logits/rejected": -1.8793022632598877,
"logps/chosen": -1046.614990234375,
"logps/rejected": -43.94160842895508,
"loss": 0.0238,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.614682912826538,
"rewards/margins": 5.117927074432373,
"rewards/rejected": -1.5032439231872559,
"step": 117
},
{
"epoch": 1.88,
"learning_rate": 1.274193548387097e-05,
"logits/chosen": -1.7377840280532837,
"logits/rejected": -1.8570376634597778,
"logps/chosen": -1106.663330078125,
"logps/rejected": -47.238887786865234,
"loss": 0.0242,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.449557304382324,
"rewards/margins": 5.24083137512207,
"rewards/rejected": -1.7912741899490356,
"step": 130
},
{
"epoch": 2.07,
"learning_rate": 1.0645161290322582e-05,
"logits/chosen": -1.7412984371185303,
"logits/rejected": -1.9490795135498047,
"logps/chosen": -1117.510009765625,
"logps/rejected": -47.68777084350586,
"loss": 0.0227,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.141476631164551,
"rewards/margins": 5.044860363006592,
"rewards/rejected": -1.903383731842041,
"step": 143
},
{
"epoch": 2.26,
"learning_rate": 8.548387096774194e-06,
"logits/chosen": -1.6916511058807373,
"logits/rejected": -1.9522241353988647,
"logps/chosen": -1196.82861328125,
"logps/rejected": -49.64944076538086,
"loss": 0.015,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.8758084774017334,
"rewards/margins": 5.988270282745361,
"rewards/rejected": -2.1124606132507324,
"step": 156
},
{
"epoch": 2.45,
"learning_rate": 6.451612903225806e-06,
"logits/chosen": -1.760750651359558,
"logits/rejected": -1.925395131111145,
"logps/chosen": -701.3285522460938,
"logps/rejected": -50.479835510253906,
"loss": 0.0169,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.707566261291504,
"rewards/margins": 4.906687259674072,
"rewards/rejected": -2.1991212368011475,
"step": 169
},
{
"epoch": 2.64,
"learning_rate": 4.35483870967742e-06,
"logits/chosen": -1.6971558332443237,
"logits/rejected": -1.9400659799575806,
"logps/chosen": -959.2064208984375,
"logps/rejected": -51.388999938964844,
"loss": 0.0085,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.8825502395629883,
"rewards/margins": 6.139618873596191,
"rewards/rejected": -2.2570688724517822,
"step": 182
},
{
"epoch": 2.83,
"learning_rate": 2.2580645161290324e-06,
"logits/chosen": -1.7328979969024658,
"logits/rejected": -2.0168874263763428,
"logps/chosen": -926.796875,
"logps/rejected": -52.40264129638672,
"loss": 0.0097,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.548464059829712,
"rewards/margins": 5.864409446716309,
"rewards/rejected": -2.3159451484680176,
"step": 195
}
],
"logging_steps": 13,
"max_steps": 207,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|