File size: 7,554 Bytes
5de8f9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.984,
"eval_steps": 100,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 3.846153846153847e-07,
"logits/chosen": 0.02903342992067337,
"logits/rejected": 0.16799500584602356,
"logps/chosen": -204.7097930908203,
"logps/rejected": -186.28207397460938,
"loss": 0.0102,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.16,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 0.11909348517656326,
"logits/rejected": 0.1480591893196106,
"logps/chosen": -174.38600158691406,
"logps/rejected": -139.39389038085938,
"loss": 0.0103,
"rewards/accuracies": 0.3541666567325592,
"rewards/chosen": 0.0004580159147735685,
"rewards/margins": -5.351095023797825e-05,
"rewards/rejected": 0.0005115267704240978,
"step": 10
},
{
"epoch": 0.32,
"learning_rate": 4.951096619903317e-06,
"logits/chosen": 0.1915196031332016,
"logits/rejected": 0.26785004138946533,
"logps/chosen": -186.1024169921875,
"logps/rejected": -150.16329956054688,
"loss": 0.0103,
"rewards/accuracies": 0.40625,
"rewards/chosen": 8.462425466859713e-05,
"rewards/margins": 0.00011474495113361627,
"rewards/rejected": -3.012050910911057e-05,
"step": 20
},
{
"epoch": 0.48,
"learning_rate": 4.716164218065246e-06,
"logits/chosen": 0.09372388571500778,
"logits/rejected": 0.09187857806682587,
"logps/chosen": -189.90634155273438,
"logps/rejected": -176.62911987304688,
"loss": 0.0102,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": 0.0012555012945085764,
"rewards/margins": 0.0008926725131459534,
"rewards/rejected": 0.0003628287522587925,
"step": 30
},
{
"epoch": 0.64,
"learning_rate": 4.3048902348863116e-06,
"logits/chosen": 0.21573364734649658,
"logits/rejected": 0.1305653154850006,
"logps/chosen": -178.8751220703125,
"logps/rejected": -151.7847137451172,
"loss": 0.0106,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": 0.0010144033003598452,
"rewards/margins": -0.0007725629839114845,
"rewards/rejected": 0.0017869662260636687,
"step": 40
},
{
"epoch": 0.8,
"learning_rate": 3.7500000000000005e-06,
"logits/chosen": 0.10782618820667267,
"logits/rejected": 0.1629345715045929,
"logps/chosen": -185.59564208984375,
"logps/rejected": -174.7571563720703,
"loss": 0.0106,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.00029209800413809717,
"rewards/margins": -0.0012864455347880721,
"rewards/rejected": 0.0009943475015461445,
"step": 50
},
{
"epoch": 0.96,
"learning_rate": 3.0956464785579125e-06,
"logits/chosen": 0.19225239753723145,
"logits/rejected": 0.18530502915382385,
"logps/chosen": -194.72091674804688,
"logps/rejected": -159.44725036621094,
"loss": 0.0101,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": 0.002120513701811433,
"rewards/margins": 0.0013710735365748405,
"rewards/rejected": 0.0007494401070289314,
"step": 60
},
{
"epoch": 1.12,
"learning_rate": 2.39389699200963e-06,
"logits/chosen": 0.18958896398544312,
"logits/rejected": 0.14928244054317474,
"logps/chosen": -196.82078552246094,
"logps/rejected": -172.227294921875,
"loss": 0.0107,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.00045417825458571315,
"rewards/margins": -0.0015611432027071714,
"rewards/rejected": 0.0011069647734984756,
"step": 70
},
{
"epoch": 1.28,
"learning_rate": 1.700590188571887e-06,
"logits/chosen": 0.15342679619789124,
"logits/rejected": 0.13346508145332336,
"logps/chosen": -176.46441650390625,
"logps/rejected": -149.63717651367188,
"loss": 0.0102,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": 0.0011017677607014775,
"rewards/margins": 0.0006318273372016847,
"rewards/rejected": 0.000469940627226606,
"step": 80
},
{
"epoch": 1.44,
"learning_rate": 1.0708929268538034e-06,
"logits/chosen": 0.19051842391490936,
"logits/rejected": 0.15732800960540771,
"logps/chosen": -183.8511199951172,
"logps/rejected": -157.0377197265625,
"loss": 0.0104,
"rewards/accuracies": 0.34375,
"rewards/chosen": 0.000780799426138401,
"rewards/margins": -0.000656499934848398,
"rewards/rejected": 0.0014372995356097817,
"step": 90
},
{
"epoch": 1.6,
"learning_rate": 5.549106142039018e-07,
"logits/chosen": 0.16213683784008026,
"logits/rejected": 0.04842492565512657,
"logps/chosen": -177.85955810546875,
"logps/rejected": -152.51058959960938,
"loss": 0.0102,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": 0.0025749087799340487,
"rewards/margins": 0.0006177256000228226,
"rewards/rejected": 0.001957183238118887,
"step": 100
},
{
"epoch": 1.6,
"eval_logits/chosen": -0.007027674000710249,
"eval_logits/rejected": 0.09070703387260437,
"eval_logps/chosen": -306.4146728515625,
"eval_logps/rejected": -278.6007995605469,
"eval_loss": 0.010791419073939323,
"eval_rewards/accuracies": 0.4830000102519989,
"eval_rewards/chosen": 0.001202387735247612,
"eval_rewards/margins": -0.00015190700651146472,
"eval_rewards/rejected": 0.001354294829070568,
"eval_runtime": 432.9902,
"eval_samples_per_second": 4.619,
"eval_steps_per_second": 1.155,
"step": 100
},
{
"epoch": 1.76,
"learning_rate": 1.937002879188285e-07,
"logits/chosen": 0.22388437390327454,
"logits/rejected": 0.19077368080615997,
"logps/chosen": -205.4290008544922,
"logps/rejected": -172.20669555664062,
"loss": 0.0104,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.00022258120588958263,
"rewards/margins": -0.00020028270955663174,
"rewards/rejected": 0.0004228639299981296,
"step": 110
},
{
"epoch": 1.92,
"learning_rate": 1.6003680950742728e-08,
"logits/chosen": 0.17824237048625946,
"logits/rejected": 0.234628364443779,
"logps/chosen": -184.6617431640625,
"logps/rejected": -160.73251342773438,
"loss": 0.0104,
"rewards/accuracies": 0.40625,
"rewards/chosen": 0.0013891400303691626,
"rewards/margins": 9.483665053267032e-05,
"rewards/rejected": 0.0012943033361807466,
"step": 120
},
{
"epoch": 1.98,
"step": 124,
"total_flos": 0.0,
"train_loss": 0.010348274312432735,
"train_runtime": 1408.9952,
"train_samples_per_second": 1.419,
"train_steps_per_second": 0.088
}
],
"logging_steps": 10,
"max_steps": 124,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|