File size: 7,018 Bytes
820d27f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.971563981042654,
"eval_steps": 100,
"global_step": 104,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018957345971563982,
"grad_norm": 66.08671668865084,
"learning_rate": 4.545454545454545e-08,
"logits/chosen": 117.53560638427734,
"logits/rejected": 126.8960952758789,
"logps/chosen": -335.40118408203125,
"logps/rejected": -439.16552734375,
"loss": 0.5,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.1895734597156398,
"grad_norm": 69.395273460518,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": 135.0714569091797,
"logits/rejected": 138.4192657470703,
"logps/chosen": -394.4978332519531,
"logps/rejected": -438.8009338378906,
"loss": 0.4962,
"rewards/accuracies": 0.4652777910232544,
"rewards/chosen": 0.07493551820516586,
"rewards/margins": 0.048605356365442276,
"rewards/rejected": 0.026330159977078438,
"step": 10
},
{
"epoch": 0.3791469194312796,
"grad_norm": 29.977088974021598,
"learning_rate": 4.885348141000122e-07,
"logits/chosen": 122.71434020996094,
"logits/rejected": 126.32965087890625,
"logps/chosen": -353.5599060058594,
"logps/rejected": -406.46490478515625,
"loss": 0.411,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.9426037073135376,
"rewards/margins": 0.2662777602672577,
"rewards/rejected": 0.676325798034668,
"step": 20
},
{
"epoch": 0.5687203791469194,
"grad_norm": 25.169253527598716,
"learning_rate": 4.5025027361734613e-07,
"logits/chosen": 144.8006134033203,
"logits/rejected": 138.1434326171875,
"logps/chosen": -380.2307434082031,
"logps/rejected": -436.6331481933594,
"loss": 0.3519,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.6117245554924011,
"rewards/margins": 1.4432713985443115,
"rewards/rejected": -0.8315467834472656,
"step": 30
},
{
"epoch": 0.7582938388625592,
"grad_norm": 24.67006837987384,
"learning_rate": 3.893311157806091e-07,
"logits/chosen": 136.48574829101562,
"logits/rejected": 125.43827819824219,
"logps/chosen": -322.3842468261719,
"logps/rejected": -364.0414733886719,
"loss": 0.3425,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 1.5904960632324219,
"rewards/margins": 1.8988056182861328,
"rewards/rejected": -0.3083093464374542,
"step": 40
},
{
"epoch": 0.9478672985781991,
"grad_norm": 25.00841040201808,
"learning_rate": 3.126631330646801e-07,
"logits/chosen": 154.9099884033203,
"logits/rejected": 158.31307983398438,
"logps/chosen": -383.2764892578125,
"logps/rejected": -484.3291015625,
"loss": 0.3108,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.793367624282837,
"rewards/margins": 2.193502902984619,
"rewards/rejected": -0.4001353681087494,
"step": 50
},
{
"epoch": 1.1374407582938388,
"grad_norm": 20.059631761558116,
"learning_rate": 2.2891223348923882e-07,
"logits/chosen": 145.4669647216797,
"logits/rejected": 149.66366577148438,
"logps/chosen": -360.8850402832031,
"logps/rejected": -458.97705078125,
"loss": 0.2596,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 1.8560327291488647,
"rewards/margins": 2.9866089820861816,
"rewards/rejected": -1.1305763721466064,
"step": 60
},
{
"epoch": 1.3270142180094786,
"grad_norm": 19.000102691830318,
"learning_rate": 1.4754491880085317e-07,
"logits/chosen": 140.24868774414062,
"logits/rejected": 141.07412719726562,
"logps/chosen": -329.3720703125,
"logps/rejected": -430.41650390625,
"loss": 0.2358,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.8885892629623413,
"rewards/margins": 2.7337210178375244,
"rewards/rejected": -0.8451315760612488,
"step": 70
},
{
"epoch": 1.5165876777251186,
"grad_norm": 20.284917774527994,
"learning_rate": 7.775827023107834e-08,
"logits/chosen": 127.765625,
"logits/rejected": 143.33151245117188,
"logps/chosen": -311.8875427246094,
"logps/rejected": -435.666015625,
"loss": 0.2087,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 2.0881741046905518,
"rewards/margins": 3.2095909118652344,
"rewards/rejected": -1.121416687965393,
"step": 80
},
{
"epoch": 1.7061611374407581,
"grad_norm": 20.77837485504225,
"learning_rate": 2.7440387297912122e-08,
"logits/chosen": 128.07351684570312,
"logits/rejected": 140.38681030273438,
"logps/chosen": -341.97064208984375,
"logps/rejected": -461.2508850097656,
"loss": 0.2054,
"rewards/accuracies": 0.84375,
"rewards/chosen": 2.22301983833313,
"rewards/margins": 3.40147066116333,
"rewards/rejected": -1.1784509420394897,
"step": 90
},
{
"epoch": 1.8957345971563981,
"grad_norm": 19.748881150000962,
"learning_rate": 2.27878296044029e-09,
"logits/chosen": 134.47386169433594,
"logits/rejected": 134.50311279296875,
"logps/chosen": -334.57818603515625,
"logps/rejected": -429.48236083984375,
"loss": 0.1946,
"rewards/accuracies": 0.84375,
"rewards/chosen": 2.3698461055755615,
"rewards/margins": 3.005765438079834,
"rewards/rejected": -0.6359192728996277,
"step": 100
},
{
"epoch": 1.8957345971563981,
"eval_logits/chosen": 113.83429718017578,
"eval_logits/rejected": 108.64144134521484,
"eval_logps/chosen": -333.18048095703125,
"eval_logps/rejected": -365.2297668457031,
"eval_loss": 0.2941707372665405,
"eval_rewards/accuracies": 0.6458333134651184,
"eval_rewards/chosen": 1.5243864059448242,
"eval_rewards/margins": 1.6823266744613647,
"eval_rewards/rejected": -0.15794026851654053,
"eval_runtime": 116.4001,
"eval_samples_per_second": 6.443,
"eval_steps_per_second": 0.206,
"step": 100
},
{
"epoch": 1.971563981042654,
"step": 104,
"total_flos": 0.0,
"train_loss": 0.2986852119748409,
"train_runtime": 2293.3212,
"train_samples_per_second": 5.887,
"train_steps_per_second": 0.045
}
],
"logging_steps": 10,
"max_steps": 104,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|