File size: 8,189 Bytes
d6744fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9805825242718447,
"eval_steps": 25,
"global_step": 102,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.09090909090909e-09,
"logits/generated": -2.7517285346984863,
"logits/real": -2.7709789276123047,
"logps/generated": -844.9539794921875,
"logps/real": -335.89251708984375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.19,
"learning_rate": 9.09090909090909e-08,
"logits/generated": -2.6863832473754883,
"logits/real": -2.7313873767852783,
"logps/generated": -753.656005859375,
"logps/real": -260.1191711425781,
"loss": 0.5813,
"rewards/accuracies": 0.7430555820465088,
"rewards/generated": -0.3012603223323822,
"rewards/margins": 0.32114675641059875,
"rewards/real": 0.019886476919054985,
"step": 10
},
{
"epoch": 0.39,
"learning_rate": 9.010989010989011e-08,
"logits/generated": -2.4782943725585938,
"logits/real": -2.5394017696380615,
"logps/generated": -1096.2259521484375,
"logps/real": -633.4000854492188,
"loss": 5.8769,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -33.22383499145508,
"rewards/margins": -1.7550216913223267,
"rewards/real": -34.978858947753906,
"step": 20
},
{
"epoch": 0.49,
"eval_logits/generated": -2.7939765453338623,
"eval_logits/real": -2.838216781616211,
"eval_logps/generated": -719.6648559570312,
"eval_logps/real": -274.2817077636719,
"eval_loss": 0.18901990354061127,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/generated": -2.9832763671875,
"eval_rewards/margins": 2.815262794494629,
"eval_rewards/real": -0.16801361739635468,
"eval_runtime": 56.889,
"eval_samples_per_second": 6.469,
"eval_steps_per_second": 0.211,
"step": 25
},
{
"epoch": 0.58,
"learning_rate": 7.912087912087911e-08,
"logits/generated": -2.823836088180542,
"logits/real": -2.8936166763305664,
"logps/generated": -856.4396362304688,
"logps/real": -293.3896179199219,
"loss": 0.1538,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.060887336730957,
"rewards/margins": 2.894258975982666,
"rewards/real": -0.1666283905506134,
"step": 30
},
{
"epoch": 0.78,
"learning_rate": 6.813186813186813e-08,
"logits/generated": -2.8231825828552246,
"logits/real": -2.8379809856414795,
"logps/generated": -769.75048828125,
"logps/real": -256.47027587890625,
"loss": 0.1241,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -3.725442409515381,
"rewards/margins": 3.5178775787353516,
"rewards/real": -0.20756463706493378,
"step": 40
},
{
"epoch": 0.97,
"learning_rate": 5.714285714285714e-08,
"logits/generated": -2.8316774368286133,
"logits/real": -2.8536953926086426,
"logps/generated": -803.0462646484375,
"logps/real": -274.77337646484375,
"loss": 0.1202,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -3.983602523803711,
"rewards/margins": 3.5648624897003174,
"rewards/real": -0.4187401831150055,
"step": 50
},
{
"epoch": 0.97,
"eval_logits/generated": -2.83947491645813,
"eval_logits/real": -2.8439435958862305,
"eval_logps/generated": -732.087890625,
"eval_logps/real": -276.7652282714844,
"eval_loss": 0.14404349029064178,
"eval_rewards/accuracies": 0.9479166865348816,
"eval_rewards/generated": -4.225581645965576,
"eval_rewards/margins": 3.809215784072876,
"eval_rewards/real": -0.41636598110198975,
"eval_runtime": 55.8686,
"eval_samples_per_second": 6.587,
"eval_steps_per_second": 0.215,
"step": 50
},
{
"epoch": 1.17,
"learning_rate": 4.615384615384615e-08,
"logits/generated": -2.8372151851654053,
"logits/real": -2.857466697692871,
"logps/generated": -920.998046875,
"logps/real": -288.32086181640625,
"loss": 0.0744,
"rewards/accuracies": 1.0,
"rewards/generated": -4.382575035095215,
"rewards/margins": 4.1766462326049805,
"rewards/real": -0.20592932403087616,
"step": 60
},
{
"epoch": 1.36,
"learning_rate": 3.516483516483517e-08,
"logits/generated": -2.832484483718872,
"logits/real": -2.8351359367370605,
"logps/generated": -778.0480346679688,
"logps/real": -260.9442443847656,
"loss": 0.0754,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.6376495361328125,
"rewards/margins": 4.451912879943848,
"rewards/real": -0.18573713302612305,
"step": 70
},
{
"epoch": 1.46,
"eval_logits/generated": -2.841118574142456,
"eval_logits/real": -2.838819742202759,
"eval_logps/generated": -737.3972778320312,
"eval_logps/real": -278.0699768066406,
"eval_loss": 0.1297575831413269,
"eval_rewards/accuracies": 0.9583333134651184,
"eval_rewards/generated": -4.7565155029296875,
"eval_rewards/margins": 4.2096757888793945,
"eval_rewards/real": -0.546840488910675,
"eval_runtime": 57.5597,
"eval_samples_per_second": 6.393,
"eval_steps_per_second": 0.208,
"step": 75
},
{
"epoch": 1.55,
"learning_rate": 2.4175824175824175e-08,
"logits/generated": -2.8195009231567383,
"logits/real": -2.84141206741333,
"logps/generated": -862.6012573242188,
"logps/real": -276.63531494140625,
"loss": 0.0666,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -5.086130619049072,
"rewards/margins": 4.704850196838379,
"rewards/real": -0.3812801241874695,
"step": 80
},
{
"epoch": 1.75,
"learning_rate": 1.3186813186813187e-08,
"logits/generated": -2.8254876136779785,
"logits/real": -2.823962450027466,
"logps/generated": -835.1746215820312,
"logps/real": -272.4798278808594,
"loss": 0.0572,
"rewards/accuracies": 1.0,
"rewards/generated": -5.038660526275635,
"rewards/margins": 4.638853073120117,
"rewards/real": -0.39980727434158325,
"step": 90
},
{
"epoch": 1.94,
"learning_rate": 2.197802197802198e-09,
"logits/generated": -2.8575081825256348,
"logits/real": -2.859795093536377,
"logps/generated": -838.92919921875,
"logps/real": -267.28704833984375,
"loss": 0.0621,
"rewards/accuracies": 0.9937499761581421,
"rewards/generated": -5.040513038635254,
"rewards/margins": 4.70792818069458,
"rewards/real": -0.33258455991744995,
"step": 100
},
{
"epoch": 1.94,
"eval_logits/generated": -2.84299635887146,
"eval_logits/real": -2.8375051021575928,
"eval_logps/generated": -739.3701171875,
"eval_logps/real": -278.2850646972656,
"eval_loss": 0.12532015144824982,
"eval_rewards/accuracies": 0.9479166865348816,
"eval_rewards/generated": -4.953795433044434,
"eval_rewards/margins": 4.385446071624756,
"eval_rewards/real": -0.5683497786521912,
"eval_runtime": 56.3549,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 0.213,
"step": 100
},
{
"epoch": 1.98,
"step": 102,
"total_flos": 0.0,
"train_loss": 0.7072318076503044,
"train_runtime": 2304.549,
"train_samples_per_second": 2.86,
"train_steps_per_second": 0.044
}
],
"logging_steps": 10,
"max_steps": 102,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|