RikkiXu commited on
Commit
5dda2ba
1 Parent(s): ad65f50

Model save

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 1e-08
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
52
 
53
  ### Framework versions
54
 
55
- - Transformers 4.38.2
56
  - Pytorch 2.1.2+cu118
57
  - Datasets 2.16.1
58
  - Tokenizers 0.15.2
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 3e-07
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
 
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.39.3
56
  - Pytorch 2.1.2+cu118
57
  - Datasets 2.16.1
58
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6950656716028849,
4
- "train_runtime": 4446.5407,
5
- "train_samples": 38445,
6
- "train_samples_per_second": 8.646,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.3861158726707337,
4
+ "train_runtime": 5367.071,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 9.042,
7
+ "train_steps_per_second": 0.035
8
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
- "transformers_version": "4.38.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
+ "transformers_version": "4.39.3"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:babd17a2e7814d83995456444b46a193dadee60ab7f7c1b37c860038f0952005
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b368d145612a34ed994ad48082d9b257146213f25c1ebd309358d2894fc8166
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a43edf0b9ec78db80fffc7ca6e3f6c0a1fe224c9898b090a4e4a3b7f5961b44
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b56652344c4b8a4db7f423c746179279fd145c51c280f7981952f11f7b406de
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f79f70bd9e954c826d33d1d7340ad20c07ac0dc88ee52022ef706382edcab32
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a444457bdcf58fff4f2801fa8105c617087ad5220718bb0eb8c51ce71f1022
3
  size 4540532728
runs/Jun14_02-05-29_n136-100-194/events.out.tfevents.1718302064.n136-100-194.643461.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60cbc7fd1e3658854d1f653d4a0e4675dbfaeb7af36c3221d341e6ce9a941072
3
- size 12339
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51b1aeb1055493e96a48a15605d2fe768213e463fe5c236032745b917039c655
3
+ size 18173
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6950656716028849,
4
- "train_runtime": 4446.5407,
5
- "train_samples": 38445,
6
- "train_samples_per_second": 8.646,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.3861158726707337,
4
+ "train_runtime": 5367.071,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 9.042,
7
+ "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9983361064891847,
5
  "eval_steps": 500,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "grad_norm": 1142.1729750161728,
14
- "learning_rate": 6.666666666666666e-10,
15
- "logits/chosen": -4.106247425079346,
16
- "logits/rejected": -4.200438499450684,
17
- "logps/chosen": -382.81439208984375,
18
- "logps/rejected": -357.65960693359375,
19
- "loss": 0.685,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,242 +24,287 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.07,
28
- "grad_norm": 1162.0154294843069,
29
- "learning_rate": 6.666666666666667e-09,
30
- "logits/chosen": -4.2175726890563965,
31
- "logits/rejected": -4.321321487426758,
32
- "logps/chosen": -334.61383056640625,
33
- "logps/rejected": -313.4597473144531,
34
- "loss": 0.7288,
35
- "rewards/accuracies": 0.4756944477558136,
36
- "rewards/chosen": -0.016012493520975113,
37
- "rewards/margins": 0.006440857890993357,
38
- "rewards/rejected": -0.022453350946307182,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.13,
43
- "grad_norm": 1254.20259356522,
44
- "learning_rate": 9.966191788709716e-09,
45
- "logits/chosen": -4.266871452331543,
46
- "logits/rejected": -4.419375896453857,
47
- "logps/chosen": -313.91156005859375,
48
- "logps/rejected": -288.8208923339844,
49
- "loss": 0.7239,
50
- "rewards/accuracies": 0.512499988079071,
51
- "rewards/chosen": 0.0004551798047032207,
52
- "rewards/margins": 0.007867029868066311,
53
- "rewards/rejected": -0.0074118501506745815,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.2,
58
- "grad_norm": 1168.2123312853032,
59
- "learning_rate": 9.698463103929542e-09,
60
- "logits/chosen": -4.302128791809082,
61
- "logits/rejected": -4.365870475769043,
62
- "logps/chosen": -308.2377014160156,
63
- "logps/rejected": -285.6295471191406,
64
- "loss": 0.7344,
65
- "rewards/accuracies": 0.5249999761581421,
66
- "rewards/chosen": 0.019400831311941147,
67
- "rewards/margins": 0.019786948338150978,
68
- "rewards/rejected": -0.0003861159202642739,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.27,
73
- "grad_norm": 1099.047513823573,
74
- "learning_rate": 9.177439057064682e-09,
75
- "logits/chosen": -4.187483310699463,
76
- "logits/rejected": -4.270766735076904,
77
- "logps/chosen": -332.38433837890625,
78
- "logps/rejected": -307.4942321777344,
79
- "loss": 0.7115,
80
- "rewards/accuracies": 0.5406249761581421,
81
- "rewards/chosen": 0.0541040301322937,
82
- "rewards/margins": 0.07320869714021683,
83
- "rewards/rejected": -0.01910465955734253,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.33,
88
- "grad_norm": 1141.2520247434832,
89
- "learning_rate": 8.431208189343668e-09,
90
- "logits/chosen": -4.198305606842041,
91
- "logits/rejected": -4.367269992828369,
92
- "logps/chosen": -333.6199645996094,
93
- "logps/rejected": -308.95989990234375,
94
- "loss": 0.7163,
95
- "rewards/accuracies": 0.4906249940395355,
96
- "rewards/chosen": 0.06264184415340424,
97
- "rewards/margins": -0.024443484842777252,
98
- "rewards/rejected": 0.08708532154560089,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.4,
103
- "grad_norm": 1099.3679350302498,
104
- "learning_rate": 7.500000000000001e-09,
105
- "logits/chosen": -4.187924385070801,
106
- "logits/rejected": -4.2703022956848145,
107
- "logps/chosen": -323.7719421386719,
108
- "logps/rejected": -308.23748779296875,
109
- "loss": 0.7118,
110
- "rewards/accuracies": 0.515625,
111
- "rewards/chosen": 0.11530591547489166,
112
- "rewards/margins": 0.03647974878549576,
113
- "rewards/rejected": 0.07882615178823471,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.47,
118
- "grad_norm": 1096.6613245075011,
119
- "learning_rate": 6.434016163555451e-09,
120
- "logits/chosen": -4.1484785079956055,
121
- "logits/rejected": -4.303661346435547,
122
- "logps/chosen": -344.68658447265625,
123
- "logps/rejected": -314.47064208984375,
124
- "loss": 0.6973,
125
- "rewards/accuracies": 0.5625,
126
- "rewards/chosen": 0.1820925623178482,
127
- "rewards/margins": 0.10691970586776733,
128
- "rewards/rejected": 0.07517284899950027,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.53,
133
- "grad_norm": 1050.1170453783911,
134
- "learning_rate": 5.290724144552379e-09,
135
- "logits/chosen": -4.223504543304443,
136
- "logits/rejected": -4.3897480964660645,
137
- "logps/chosen": -334.43511962890625,
138
- "logps/rejected": -305.484375,
139
- "loss": 0.6912,
140
- "rewards/accuracies": 0.559374988079071,
141
- "rewards/chosen": 0.20204909145832062,
142
- "rewards/margins": 0.08096315711736679,
143
- "rewards/rejected": 0.12108592689037323,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.6,
148
- "grad_norm": 1080.8957995779926,
149
- "learning_rate": 4.131759111665349e-09,
150
- "logits/chosen": -4.228762626647949,
151
- "logits/rejected": -4.349400997161865,
152
- "logps/chosen": -327.1580810546875,
153
- "logps/rejected": -307.87689208984375,
154
- "loss": 0.674,
155
- "rewards/accuracies": 0.550000011920929,
156
- "rewards/chosen": 0.24468111991882324,
157
- "rewards/margins": 0.05739554762840271,
158
- "rewards/rejected": 0.18728554248809814,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.67,
163
- "grad_norm": 1086.0982504773885,
164
- "learning_rate": 3.0196011698042157e-09,
165
- "logits/chosen": -4.210589408874512,
166
- "logits/rejected": -4.420603275299072,
167
- "logps/chosen": -320.5531311035156,
168
- "logps/rejected": -289.7651062011719,
169
- "loss": 0.6799,
170
- "rewards/accuracies": 0.578125,
171
- "rewards/chosen": 0.2572989761829376,
172
- "rewards/margins": 0.10579316318035126,
173
- "rewards/rejected": 0.15150579810142517,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.73,
178
- "grad_norm": 1114.0041176823654,
179
- "learning_rate": 2.0142070414860704e-09,
180
- "logits/chosen": -4.225996971130371,
181
- "logits/rejected": -4.2789506912231445,
182
- "logps/chosen": -314.6085205078125,
183
- "logps/rejected": -303.3541564941406,
184
- "loss": 0.6851,
185
- "rewards/accuracies": 0.546875,
186
- "rewards/chosen": 0.27688390016555786,
187
- "rewards/margins": 0.041596584022045135,
188
- "rewards/rejected": 0.23528733849525452,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.8,
193
- "grad_norm": 1097.8240578626694,
194
- "learning_rate": 1.1697777844051105e-09,
195
- "logits/chosen": -4.1503801345825195,
196
- "logits/rejected": -4.306635856628418,
197
- "logps/chosen": -338.7808837890625,
198
- "logps/rejected": -313.2768249511719,
199
- "loss": 0.6758,
200
- "rewards/accuracies": 0.5874999761581421,
201
- "rewards/chosen": 0.3373282849788666,
202
- "rewards/margins": 0.1483292281627655,
203
- "rewards/rejected": 0.18899908661842346,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.87,
208
- "grad_norm": 1113.3294478605874,
209
- "learning_rate": 5.318367983829391e-10,
210
- "logits/chosen": -4.178295135498047,
211
- "logits/rejected": -4.3724284172058105,
212
- "logps/chosen": -323.9390563964844,
213
- "logps/rejected": -304.91119384765625,
214
- "loss": 0.6643,
215
- "rewards/accuracies": 0.574999988079071,
216
- "rewards/chosen": 0.340619832277298,
217
- "rewards/margins": 0.12920674681663513,
218
- "rewards/rejected": 0.21141307055950165,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.93,
223
- "grad_norm": 1112.5811910392042,
224
- "learning_rate": 1.3477564710088098e-10,
225
- "logits/chosen": -4.293918609619141,
226
- "logits/rejected": -4.359633445739746,
227
- "logps/chosen": -307.56317138671875,
228
- "logps/rejected": -297.0579833984375,
229
- "loss": 0.6697,
230
- "rewards/accuracies": 0.596875011920929,
231
- "rewards/chosen": 0.34358957409858704,
232
- "rewards/margins": 0.11470258235931396,
233
- "rewards/rejected": 0.22888696193695068,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 1.0,
238
- "grad_norm": 1082.9191655386894,
239
- "learning_rate": 0.0,
240
- "logits/chosen": -4.258730411529541,
241
- "logits/rejected": -4.332475185394287,
242
- "logps/chosen": -312.3280029296875,
243
- "logps/rejected": -300.03082275390625,
244
- "loss": 0.6661,
245
- "rewards/accuracies": 0.574999988079071,
246
- "rewards/chosen": 0.3249002993106842,
247
- "rewards/margins": 0.10186745971441269,
248
- "rewards/rejected": 0.2230328619480133,
249
  "step": 150
250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  {
252
  "epoch": 1.0,
253
- "step": 150,
254
  "total_flos": 0.0,
255
- "train_loss": 0.6950656716028849,
256
- "train_runtime": 4446.5407,
257
- "train_samples_per_second": 8.646,
258
- "train_steps_per_second": 0.034
259
  }
260
  ],
261
  "logging_steps": 10,
262
- "max_steps": 150,
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 1,
265
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9960474308300395,
5
  "eval_steps": 500,
6
+ "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "grad_norm": 41.432535799844544,
14
+ "learning_rate": 1.5789473684210525e-08,
15
+ "logits/chosen": -2.270329713821411,
16
+ "logits/rejected": -2.2495758533477783,
17
+ "logps/chosen": -1.0721262693405151,
18
+ "logps/rejected": -0.9967758059501648,
19
+ "loss": 1.8459,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.05,
28
+ "grad_norm": 38.0481408472616,
29
+ "learning_rate": 1.5789473684210525e-07,
30
+ "logits/chosen": -2.3005785942077637,
31
+ "logits/rejected": -2.236785888671875,
32
+ "logps/chosen": -1.0405129194259644,
33
+ "logps/rejected": -1.0305094718933105,
34
+ "loss": 1.8036,
35
+ "rewards/accuracies": 0.4618055522441864,
36
+ "rewards/chosen": -0.0005758580518886447,
37
+ "rewards/margins": -0.0005768582923337817,
38
+ "rewards/rejected": 1.000240445137024e-06,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.11,
43
+ "grad_norm": 39.11632822940491,
44
+ "learning_rate": 2.9997438756870786e-07,
45
+ "logits/chosen": -2.323885917663574,
46
+ "logits/rejected": -2.296593189239502,
47
+ "logps/chosen": -1.053823709487915,
48
+ "logps/rejected": -1.0721189975738525,
49
+ "loss": 1.7888,
50
+ "rewards/accuracies": 0.690625011920929,
51
+ "rewards/chosen": -0.0061118630692362785,
52
+ "rewards/margins": 0.008750928565859795,
53
+ "rewards/rejected": -0.014862793497741222,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.16,
58
+ "grad_norm": 34.61655659817795,
59
+ "learning_rate": 2.9691146514020486e-07,
60
+ "logits/chosen": -2.4537835121154785,
61
+ "logits/rejected": -2.406308889389038,
62
+ "logps/chosen": -1.035614013671875,
63
+ "logps/rejected": -1.090877890586853,
64
+ "loss": 1.7604,
65
+ "rewards/accuracies": 0.778124988079071,
66
+ "rewards/chosen": -0.04775990918278694,
67
+ "rewards/margins": 0.049577243626117706,
68
+ "rewards/rejected": -0.09733714908361435,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.21,
73
+ "grad_norm": 39.56505728726439,
74
+ "learning_rate": 2.888456489672334e-07,
75
+ "logits/chosen": -2.672393321990967,
76
+ "logits/rejected": -2.6184394359588623,
77
+ "logps/chosen": -1.121576189994812,
78
+ "logps/rejected": -1.1622512340545654,
79
+ "loss": 1.7281,
80
+ "rewards/accuracies": 0.746874988079071,
81
+ "rewards/chosen": -0.1657789945602417,
82
+ "rewards/margins": 0.10218825191259384,
83
+ "rewards/rejected": -0.26796722412109375,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.26,
88
+ "grad_norm": 41.60011182654463,
89
+ "learning_rate": 2.7605161074568387e-07,
90
+ "logits/chosen": -2.921407461166382,
91
+ "logits/rejected": -2.891125202178955,
92
+ "logps/chosen": -1.28288733959198,
93
+ "logps/rejected": -1.3418314456939697,
94
+ "loss": 1.7131,
95
+ "rewards/accuracies": 0.768750011920929,
96
+ "rewards/chosen": -0.4241597056388855,
97
+ "rewards/margins": 0.17930208146572113,
98
+ "rewards/rejected": -0.603461742401123,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.32,
103
+ "grad_norm": 51.7670369092107,
104
+ "learning_rate": 2.5896503610243364e-07,
105
+ "logits/chosen": -3.184028148651123,
106
+ "logits/rejected": -3.152650833129883,
107
+ "logps/chosen": -1.467505693435669,
108
+ "logps/rejected": -1.6431095600128174,
109
+ "loss": 1.6172,
110
+ "rewards/accuracies": 0.7718750238418579,
111
+ "rewards/chosen": -0.7787442207336426,
112
+ "rewards/margins": 0.3277961313724518,
113
+ "rewards/rejected": -1.106540322303772,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.37,
118
+ "grad_norm": 73.28833563129433,
119
+ "learning_rate": 2.3816778784387096e-07,
120
+ "logits/chosen": -3.472806930541992,
121
+ "logits/rejected": -3.4669137001037598,
122
+ "logps/chosen": -1.7310292720794678,
123
+ "logps/rejected": -1.9755589962005615,
124
+ "loss": 1.552,
125
+ "rewards/accuracies": 0.75,
126
+ "rewards/chosen": -1.4173996448516846,
127
+ "rewards/margins": 0.45889949798583984,
128
+ "rewards/rejected": -1.8762991428375244,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.42,
133
+ "grad_norm": 95.75717630614442,
134
+ "learning_rate": 2.1436809131589132e-07,
135
+ "logits/chosen": -3.7732110023498535,
136
+ "logits/rejected": -3.794734477996826,
137
+ "logps/chosen": -2.3010740280151367,
138
+ "logps/rejected": -2.5936408042907715,
139
+ "loss": 1.4579,
140
+ "rewards/accuracies": 0.7093750238418579,
141
+ "rewards/chosen": -2.5355091094970703,
142
+ "rewards/margins": 0.5785154700279236,
143
+ "rewards/rejected": -3.1140246391296387,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.47,
148
+ "grad_norm": 108.99770590059093,
149
+ "learning_rate": 1.8837641663916534e-07,
150
+ "logits/chosen": -4.047728061676025,
151
+ "logits/rejected": -4.063739776611328,
152
+ "logps/chosen": -2.8391122817993164,
153
+ "logps/rejected": -3.388620376586914,
154
+ "loss": 1.3563,
155
+ "rewards/accuracies": 0.734375,
156
+ "rewards/chosen": -3.563304901123047,
157
+ "rewards/margins": 1.0540244579315186,
158
+ "rewards/rejected": -4.617329120635986,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.53,
163
+ "grad_norm": 91.4347139330851,
164
+ "learning_rate": 1.610778791212311e-07,
165
+ "logits/chosen": -4.239941596984863,
166
+ "logits/rejected": -4.29507303237915,
167
+ "logps/chosen": -2.653388738632202,
168
+ "logps/rejected": -3.2629570960998535,
169
+ "loss": 1.3233,
170
+ "rewards/accuracies": 0.778124988079071,
171
+ "rewards/chosen": -3.28294038772583,
172
+ "rewards/margins": 1.1777704954147339,
173
+ "rewards/rejected": -4.4607110023498535,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.58,
178
+ "grad_norm": 131.69525214561466,
179
+ "learning_rate": 1.3340209771627488e-07,
180
+ "logits/chosen": -4.439741611480713,
181
+ "logits/rejected": -4.547017574310303,
182
+ "logps/chosen": -2.945267915725708,
183
+ "logps/rejected": -3.710526943206787,
184
+ "loss": 1.2097,
185
+ "rewards/accuracies": 0.778124988079071,
186
+ "rewards/chosen": -3.7780189514160156,
187
+ "rewards/margins": 1.5611803531646729,
188
+ "rewards/rejected": -5.339199542999268,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.63,
193
+ "grad_norm": 119.65234559570642,
194
+ "learning_rate": 1.0629153796660131e-07,
195
+ "logits/chosen": -4.577895164489746,
196
+ "logits/rejected": -4.687996864318848,
197
+ "logps/chosen": -3.094693899154663,
198
+ "logps/rejected": -3.9398090839385986,
199
+ "loss": 1.1282,
200
+ "rewards/accuracies": 0.7749999761581421,
201
+ "rewards/chosen": -4.051518440246582,
202
+ "rewards/margins": 1.6234314441680908,
203
+ "rewards/rejected": -5.674950122833252,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.69,
208
+ "grad_norm": 129.7843781353164,
209
+ "learning_rate": 8.066941746895304e-08,
210
+ "logits/chosen": -4.77476167678833,
211
+ "logits/rejected": -4.9267988204956055,
212
+ "logps/chosen": -3.1812126636505127,
213
+ "logps/rejected": -4.0755157470703125,
214
+ "loss": 1.1661,
215
+ "rewards/accuracies": 0.78125,
216
+ "rewards/chosen": -4.220904350280762,
217
+ "rewards/margins": 1.8181953430175781,
218
+ "rewards/rejected": -6.03909969329834,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.74,
223
+ "grad_norm": 105.67370924537627,
224
+ "learning_rate": 5.7408266806531073e-08,
225
+ "logits/chosen": -4.906655311584473,
226
+ "logits/rejected": -5.071400165557861,
227
+ "logps/chosen": -3.282118320465088,
228
+ "logps/rejected": -4.2006425857543945,
229
+ "loss": 1.1872,
230
+ "rewards/accuracies": 0.778124988079071,
231
+ "rewards/chosen": -4.442627906799316,
232
+ "rewards/margins": 1.8388820886611938,
233
+ "rewards/rejected": -6.281510353088379,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 0.79,
238
+ "grad_norm": 108.86520544080616,
239
+ "learning_rate": 3.730021656646899e-08,
240
+ "logits/chosen": -5.13236141204834,
241
+ "logits/rejected": -5.272187232971191,
242
+ "logps/chosen": -3.555389881134033,
243
+ "logps/rejected": -4.640769958496094,
244
+ "loss": 1.1411,
245
+ "rewards/accuracies": 0.7718750238418579,
246
+ "rewards/chosen": -5.0327253341674805,
247
+ "rewards/margins": 2.142836332321167,
248
+ "rewards/rejected": -7.175561428070068,
249
  "step": 150
250
  },
251
+ {
252
+ "epoch": 0.84,
253
+ "grad_norm": 104.57387509776791,
254
+ "learning_rate": 2.103002228274413e-08,
255
+ "logits/chosen": -4.998870372772217,
256
+ "logits/rejected": -5.1569671630859375,
257
+ "logps/chosen": -3.4073867797851562,
258
+ "logps/rejected": -4.47383975982666,
259
+ "loss": 1.1059,
260
+ "rewards/accuracies": 0.7718750238418579,
261
+ "rewards/chosen": -4.684414386749268,
262
+ "rewards/margins": 2.137641429901123,
263
+ "rewards/rejected": -6.822054862976074,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.9,
268
+ "grad_norm": 109.01559488214998,
269
+ "learning_rate": 9.151745907741537e-09,
270
+ "logits/chosen": -5.156809329986572,
271
+ "logits/rejected": -5.336338520050049,
272
+ "logps/chosen": -3.476139545440674,
273
+ "logps/rejected": -4.648383617401123,
274
+ "loss": 1.113,
275
+ "rewards/accuracies": 0.8125,
276
+ "rewards/chosen": -4.8243327140808105,
277
+ "rewards/margins": 2.336613178253174,
278
+ "rewards/rejected": -7.160945892333984,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.95,
283
+ "grad_norm": 149.6092511595762,
284
+ "learning_rate": 2.069887896989614e-09,
285
+ "logits/chosen": -5.088743209838867,
286
+ "logits/rejected": -5.213286399841309,
287
+ "logps/chosen": -3.5473315715789795,
288
+ "logps/rejected": -4.448221206665039,
289
+ "loss": 1.113,
290
+ "rewards/accuracies": 0.753125011920929,
291
+ "rewards/chosen": -5.046046733856201,
292
+ "rewards/margins": 1.7532546520233154,
293
+ "rewards/rejected": -6.799302101135254,
294
+ "step": 180
295
+ },
296
  {
297
  "epoch": 1.0,
298
+ "step": 189,
299
  "total_flos": 0.0,
300
+ "train_loss": 1.3861158726707337,
301
+ "train_runtime": 5367.071,
302
+ "train_samples_per_second": 9.042,
303
+ "train_steps_per_second": 0.035
304
  }
305
  ],
306
  "logging_steps": 10,
307
+ "max_steps": 189,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 1,
310
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e370280be22422145d741eb20d38b32314505f586945952ee65047093ae07be1
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d677e274df19d2bbf6b42de6cc0260c9ddcb86589cf3edab1fca386d8bd8d657
3
  size 6264