File size: 10,030 Bytes
b820d93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9969788519637462,
  "eval_steps": 100,
  "global_step": 165,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.01,
      "grad_norm": 3.774768176591989,
      "learning_rate": 2.941176470588235e-08,
      "logits/chosen": 0.48741579055786133,
      "logits/rejected": -0.8717803955078125,
      "logps/chosen": -311.44610595703125,
      "logps/rejected": -1042.2933349609375,
      "loss": 0.6931,
      "rewards/accuracies": 0.0,
      "rewards/chosen": 0.0,
      "rewards/margins": 0.0,
      "rewards/rejected": 0.0,
      "step": 1
    },
    {
      "epoch": 0.06,
      "grad_norm": 4.065933729000048,
      "learning_rate": 2.941176470588235e-07,
      "logits/chosen": 0.3187962770462036,
      "logits/rejected": -0.46175992488861084,
      "logps/chosen": -526.5966796875,
      "logps/rejected": -899.632568359375,
      "loss": 0.6931,
      "rewards/accuracies": 0.4375,
      "rewards/chosen": 0.0003381037386134267,
      "rewards/margins": 0.00014021807874087244,
      "rewards/rejected": 0.00019788570352829993,
      "step": 10
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.133159908424447,
      "learning_rate": 4.994932636402031e-07,
      "logits/chosen": 0.22923466563224792,
      "logits/rejected": -0.6458711624145508,
      "logps/chosen": -566.1712646484375,
      "logps/rejected": -926.1541137695312,
      "loss": 0.6919,
      "rewards/accuracies": 0.581250011920929,
      "rewards/chosen": 0.0022346877958625555,
      "rewards/margins": 0.0030761375091969967,
      "rewards/rejected": -0.0008414499461650848,
      "step": 20
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.759041431537677,
      "learning_rate": 4.905416503522123e-07,
      "logits/chosen": 0.2407102882862091,
      "logits/rejected": -0.7926596999168396,
      "logps/chosen": -523.1210327148438,
      "logps/rejected": -1028.3199462890625,
      "loss": 0.6855,
      "rewards/accuracies": 0.7562500238418579,
      "rewards/chosen": 0.010289192199707031,
      "rewards/margins": 0.013627497479319572,
      "rewards/rejected": -0.003338304813951254,
      "step": 30
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.918700608724971,
      "learning_rate": 4.707922373336523e-07,
      "logits/chosen": 0.14743538200855255,
      "logits/rejected": -0.7249930500984192,
      "logps/chosen": -524.011474609375,
      "logps/rejected": -989.4501953125,
      "loss": 0.675,
      "rewards/accuracies": 0.831250011920929,
      "rewards/chosen": 0.016446446999907494,
      "rewards/margins": 0.051999401301145554,
      "rewards/rejected": -0.03555295616388321,
      "step": 40
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.6017852179026626,
      "learning_rate": 4.4113156629677313e-07,
      "logits/chosen": 0.23459818959236145,
      "logits/rejected": -0.6225197911262512,
      "logps/chosen": -481.66455078125,
      "logps/rejected": -867.3211059570312,
      "loss": 0.6639,
      "rewards/accuracies": 0.8374999761581421,
      "rewards/chosen": 0.03439263254404068,
      "rewards/margins": 0.06260526925325394,
      "rewards/rejected": -0.02821262739598751,
      "step": 50
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.679527248386035,
      "learning_rate": 4.0289109058972283e-07,
      "logits/chosen": 0.26775047183036804,
      "logits/rejected": -0.49902766942977905,
      "logps/chosen": -516.3983154296875,
      "logps/rejected": -819.7734375,
      "loss": 0.6398,
      "rewards/accuracies": 0.84375,
      "rewards/chosen": 0.03639604151248932,
      "rewards/margins": 0.1593528836965561,
      "rewards/rejected": -0.12295685708522797,
      "step": 60
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.945875727521845,
      "learning_rate": 3.577874068920446e-07,
      "logits/chosen": 0.26115402579307556,
      "logits/rejected": -0.6307616233825684,
      "logps/chosen": -534.6641845703125,
      "logps/rejected": -911.5435791015625,
      "loss": 0.6322,
      "rewards/accuracies": 0.856249988079071,
      "rewards/chosen": 0.048932626843452454,
      "rewards/margins": 0.24001319706439972,
      "rewards/rejected": -0.19108060002326965,
      "step": 70
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.7126637404674536,
      "learning_rate": 3.078451980100854e-07,
      "logits/chosen": 0.20563717186450958,
      "logits/rejected": -0.688762903213501,
      "logps/chosen": -493.32684326171875,
      "logps/rejected": -957.6318359375,
      "loss": 0.6237,
      "rewards/accuracies": 0.84375,
      "rewards/chosen": 0.060481660068035126,
      "rewards/margins": 0.21118538081645966,
      "rewards/rejected": -0.15070374310016632,
      "step": 80
    },
    {
      "epoch": 0.54,
      "grad_norm": 4.119949298182235,
      "learning_rate": 2.553063458334059e-07,
      "logits/chosen": 0.3919462263584137,
      "logits/rejected": -0.5500736832618713,
      "logps/chosen": -510.05712890625,
      "logps/rejected": -912.9411010742188,
      "loss": 0.6164,
      "rewards/accuracies": 0.824999988079071,
      "rewards/chosen": 0.05354578420519829,
      "rewards/margins": 0.2815362215042114,
      "rewards/rejected": -0.22799046337604523,
      "step": 90
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.539444195047728,
      "learning_rate": 2.0252929432814287e-07,
      "logits/chosen": 0.23407666385173798,
      "logits/rejected": -0.6277016401290894,
      "logps/chosen": -514.2950439453125,
      "logps/rejected": -985.7261962890625,
      "loss": 0.6065,
      "rewards/accuracies": 0.831250011920929,
      "rewards/chosen": 0.046576742082834244,
      "rewards/margins": 0.26194584369659424,
      "rewards/rejected": -0.2153691053390503,
      "step": 100
    },
    {
      "epoch": 0.6,
      "eval_logits/chosen": -0.1363597810268402,
      "eval_logits/rejected": -0.3805391788482666,
      "eval_logps/chosen": -523.0221557617188,
      "eval_logps/rejected": -812.6375732421875,
      "eval_loss": 0.6296960115432739,
      "eval_rewards/accuracies": 0.7678571343421936,
      "eval_rewards/chosen": 0.07959667593240738,
      "eval_rewards/margins": 0.17506957054138184,
      "eval_rewards/rejected": -0.09547291696071625,
      "eval_runtime": 22.7695,
      "eval_samples_per_second": 9.135,
      "eval_steps_per_second": 0.307,
      "step": 100
    },
    {
      "epoch": 0.66,
      "grad_norm": 4.166464578639834,
      "learning_rate": 1.5188318011445906e-07,
      "logits/chosen": 0.09842907637357712,
      "logits/rejected": -0.7154465913772583,
      "logps/chosen": -633.3096923828125,
      "logps/rejected": -972.07861328125,
      "loss": 0.5933,
      "rewards/accuracies": 0.8500000238418579,
      "rewards/chosen": 0.04117094725370407,
      "rewards/margins": 0.2970955967903137,
      "rewards/rejected": -0.25592464208602905,
      "step": 110
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.767777281679362,
      "learning_rate": 1.0564148305586295e-07,
      "logits/chosen": 0.2290249764919281,
      "logits/rejected": -0.5675751566886902,
      "logps/chosen": -553.788330078125,
      "logps/rejected": -963.9578247070312,
      "loss": 0.5795,
      "rewards/accuracies": 0.8812500238418579,
      "rewards/chosen": 0.04246982932090759,
      "rewards/margins": 0.5371382832527161,
      "rewards/rejected": -0.4946684241294861,
      "step": 120
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.959401739670467,
      "learning_rate": 6.587997083462196e-08,
      "logits/chosen": 0.1415528953075409,
      "logits/rejected": -0.6273466348648071,
      "logps/chosen": -579.4324951171875,
      "logps/rejected": -927.8792114257812,
      "loss": 0.5587,
      "rewards/accuracies": 0.8500000238418579,
      "rewards/chosen": 0.02018633857369423,
      "rewards/margins": 0.7045117616653442,
      "rewards/rejected": -0.6843255162239075,
      "step": 130
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.589724744119317,
      "learning_rate": 3.438351873250492e-08,
      "logits/chosen": 0.2175011932849884,
      "logits/rejected": -0.5643750429153442,
      "logps/chosen": -543.2364501953125,
      "logps/rejected": -1040.180908203125,
      "loss": 0.5653,
      "rewards/accuracies": 0.8687499761581421,
      "rewards/chosen": 0.03240719065070152,
      "rewards/margins": 0.5543904900550842,
      "rewards/rejected": -0.52198326587677,
      "step": 140
    },
    {
      "epoch": 0.91,
      "grad_norm": 5.293978243611277,
      "learning_rate": 1.256598743236703e-08,
      "logits/chosen": 0.2741110026836395,
      "logits/rejected": -0.6036696434020996,
      "logps/chosen": -437.42901611328125,
      "logps/rejected": -982.3721923828125,
      "loss": 0.5555,
      "rewards/accuracies": 0.8687499761581421,
      "rewards/chosen": 0.033161625266075134,
      "rewards/margins": 0.5011934638023376,
      "rewards/rejected": -0.46803179383277893,
      "step": 150
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.856259961602652,
      "learning_rate": 1.406755487774386e-09,
      "logits/chosen": 0.14368140697479248,
      "logits/rejected": -0.6074076294898987,
      "logps/chosen": -525.0721435546875,
      "logps/rejected": -952.7180786132812,
      "loss": 0.5519,
      "rewards/accuracies": 0.8687499761581421,
      "rewards/chosen": 0.04387623816728592,
      "rewards/margins": 0.42462554574012756,
      "rewards/rejected": -0.38074928522109985,
      "step": 160
    },
    {
      "epoch": 1.0,
      "step": 165,
      "total_flos": 0.0,
      "train_loss": 0.6189163742643414,
      "train_runtime": 2381.9724,
      "train_samples_per_second": 4.446,
      "train_steps_per_second": 0.069
    }
  ],
  "logging_steps": 10,
  "max_steps": 165,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}