RikkiXu commited on
Commit
1c9e834
1 Parent(s): f21341d

Model save

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-full
15
 
16
- This model was trained from scratch on the None dataset.
17
 
18
  ## Model description
19
 
@@ -32,7 +32,7 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 5e-07
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
52
 
53
  ### Framework versions
54
 
55
- - Transformers 4.41.1
56
  - Pytorch 2.1.2+cu118
57
- - Datasets 2.16.1
58
  - Tokenizers 0.19.1
 
13
 
14
  # zephyr-7b-dpo-full
15
 
16
+ This model was trained from scratch on an unknown dataset.
17
 
18
  ## Model description
19
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 5e-08
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
 
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.40.2
56
  - Pytorch 2.1.2+cu118
57
+ - Datasets 2.19.1
58
  - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.4268451908656529,
5
- "train_runtime": 5571.7779,
6
- "train_samples": 44755,
7
- "train_samples_per_second": 8.032,
8
- "train_steps_per_second": 0.031
9
  }
 
1
  {
2
+ "epoch": 0.9984,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3884877807054764,
5
+ "train_runtime": 4677.6403,
6
+ "train_samples": 39942,
7
+ "train_samples_per_second": 8.539,
8
+ "train_steps_per_second": 0.033
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/bn/xuruijie-llm/checkpoints/new_world/v1-ultral",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -20,7 +20,7 @@
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
- "transformers_version": "4.38.2",
24
  "use_cache": false,
25
  "vocab_size": 32002
26
  }
 
1
  {
2
+ "_name_or_path": "/mnt/bn/xuruijie-llm/checkpoints/new_world/v1-distill-round2-0.5-lr58",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.40.2",
24
  "use_cache": false,
25
  "vocab_size": 32002
26
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
- "transformers_version": "4.41.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
+ "transformers_version": "4.40.2"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee8452a398b257c4a92a8643d9b1dfe39769d290e7c90c76b86b619836911c45
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dba7919b04fd1d70e11a29220f5eaf634bb315409f9a94f7a263955001973c6
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee6a898b9636aed352abe6efb776fc9d203259465cf8a6ffff1063f02fa01257
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84b4be1c4d4aad2ce051a91acda1c33720055694a806a6fb36a24f1b682e2c3
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f22fcd73a88b87f23b169dac783932266e459f67992280e6211fdc500dbf6a1d
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:509b7c42a2681737189e49583a27d65db54dd70bda3ef6f8890b6b3e93dca2ff
3
  size 4540532728
runs/Jun05_23-14-01_n136-112-146/events.out.tfevents.1717600971.n136-112-146.144950.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6883c7dfa3a2614d7aa81b49faaa4c0dffd0e073132fd7768695d7d2f356efa9
3
- size 12459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144d2d387ee490ada89e7fd4e62d2de93031ca38ce217daf8e837f480121d7d9
3
+ size 16229
tokenizer.json CHANGED
@@ -152,6 +152,7 @@
152
  "end_of_word_suffix": null,
153
  "fuse_unk": true,
154
  "byte_fallback": true,
 
155
  "vocab": {
156
  "<unk>": 0,
157
  "<s>": 1,
 
152
  "end_of_word_suffix": null,
153
  "fuse_unk": true,
154
  "byte_fallback": true,
155
+ "ignore_merges": false,
156
  "vocab": {
157
  "<unk>": 0,
158
  "<s>": 1,
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.4268451908656529,
5
- "train_runtime": 5571.7779,
6
- "train_samples": 44755,
7
- "train_samples_per_second": 8.032,
8
- "train_steps_per_second": 0.031
9
  }
 
1
  {
2
+ "epoch": 0.9984,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3884877807054764,
5
+ "train_runtime": 4677.6403,
6
+ "train_samples": 39942,
7
+ "train_samples_per_second": 8.539,
8
+ "train_steps_per_second": 0.033
9
  }
trainer_state.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.005714285714285714,
13
- "grad_norm": 1251.908438964567,
14
- "learning_rate": 2.7777777777777774e-08,
15
- "logits/chosen": -4.099947929382324,
16
- "logits/rejected": -4.528928756713867,
17
- "logps/chosen": -297.4884033203125,
18
- "logps/rejected": -227.07449340820312,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -24,287 +24,245 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.05714285714285714,
28
- "grad_norm": 1007.5595895273253,
29
- "learning_rate": 2.7777777777777776e-07,
30
- "logits/chosen": -4.256350040435791,
31
- "logits/rejected": -4.503963947296143,
32
- "logps/chosen": -316.07769775390625,
33
- "logps/rejected": -254.57467651367188,
34
- "loss": 0.6613,
35
- "rewards/accuracies": 0.5034722089767456,
36
- "rewards/chosen": 0.2096220850944519,
37
- "rewards/margins": 0.15642070770263672,
38
- "rewards/rejected": 0.0532013401389122,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.11428571428571428,
43
- "grad_norm": 675.3841086149566,
44
- "learning_rate": 4.997998237821233e-07,
45
- "logits/chosen": -4.360010623931885,
46
- "logits/rejected": -4.628513813018799,
47
- "logps/chosen": -298.9122009277344,
48
- "logps/rejected": -249.00918579101562,
49
- "loss": 0.4212,
50
- "rewards/accuracies": 0.800000011920929,
51
- "rewards/chosen": 3.4100475311279297,
52
- "rewards/margins": 2.592763900756836,
53
- "rewards/rejected": 0.8172838091850281,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.17142857142857143,
58
- "grad_norm": 675.1555265980924,
59
- "learning_rate": 4.928272579403969e-07,
60
- "logits/chosen": -4.373316287994385,
61
- "logits/rejected": -4.6160383224487305,
62
- "logps/chosen": -303.8053894042969,
63
- "logps/rejected": -266.44818115234375,
64
- "loss": 0.4762,
65
- "rewards/accuracies": 0.824999988079071,
66
- "rewards/chosen": 4.110724449157715,
67
- "rewards/margins": 6.506677150726318,
68
- "rewards/rejected": -2.3959527015686035,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.22857142857142856,
73
- "grad_norm": 600.7834437052495,
74
- "learning_rate": 4.7616414547743854e-07,
75
- "logits/chosen": -4.35813045501709,
76
- "logits/rejected": -4.55276346206665,
77
- "logps/chosen": -289.32257080078125,
78
- "logps/rejected": -250.25341796875,
79
- "loss": 0.5303,
80
- "rewards/accuracies": 0.8343750238418579,
81
- "rewards/chosen": 3.9142494201660156,
82
- "rewards/margins": 7.811418056488037,
83
- "rewards/rejected": -3.897169589996338,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.2857142857142857,
88
- "grad_norm": 788.2365830395779,
89
- "learning_rate": 4.5047546391491e-07,
90
- "logits/chosen": -4.276906967163086,
91
- "logits/rejected": -4.5039567947387695,
92
- "logps/chosen": -297.9548034667969,
93
- "logps/rejected": -260.8029479980469,
94
- "loss": 0.4673,
95
- "rewards/accuracies": 0.8531249761581421,
96
- "rewards/chosen": 4.0600905418396,
97
- "rewards/margins": 7.4909186363220215,
98
- "rewards/rejected": -3.430828094482422,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.34285714285714286,
103
- "grad_norm": 715.3302112367288,
104
- "learning_rate": 4.167863756189767e-07,
105
- "logits/chosen": -4.322784900665283,
106
- "logits/rejected": -4.564073085784912,
107
- "logps/chosen": -293.1005554199219,
108
- "logps/rejected": -254.21835327148438,
109
- "loss": 0.4621,
110
- "rewards/accuracies": 0.893750011920929,
111
- "rewards/chosen": 5.134177207946777,
112
- "rewards/margins": 7.875572204589844,
113
- "rewards/rejected": -2.741394519805908,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.4,
118
- "grad_norm": 633.4373267135044,
119
- "learning_rate": 3.764413164801049e-07,
120
- "logits/chosen": -4.282719612121582,
121
- "logits/rejected": -4.559357643127441,
122
- "logps/chosen": -287.670166015625,
123
- "logps/rejected": -240.59359741210938,
124
- "loss": 0.3978,
125
- "rewards/accuracies": 0.8812500238418579,
126
- "rewards/chosen": 4.107884407043457,
127
- "rewards/margins": 7.409787654876709,
128
- "rewards/rejected": -3.301903247833252,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.45714285714285713,
133
- "grad_norm": 585.2050073161457,
134
- "learning_rate": 3.3105034329273217e-07,
135
- "logits/chosen": -4.1827239990234375,
136
- "logits/rejected": -4.454409599304199,
137
- "logps/chosen": -294.9931640625,
138
- "logps/rejected": -253.04092407226562,
139
- "loss": 0.4435,
140
- "rewards/accuracies": 0.828125,
141
- "rewards/chosen": 4.6706342697143555,
142
- "rewards/margins": 7.3439764976501465,
143
- "rewards/rejected": -2.67334246635437,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.5142857142857142,
148
- "grad_norm": 744.3685065355755,
149
- "learning_rate": 2.8242488095860204e-07,
150
- "logits/chosen": -4.200292110443115,
151
- "logits/rejected": -4.432915210723877,
152
- "logps/chosen": -289.46466064453125,
153
- "logps/rejected": -249.84048461914062,
154
- "loss": 0.4059,
155
- "rewards/accuracies": 0.8656250238418579,
156
- "rewards/chosen": 5.276065349578857,
157
- "rewards/margins": 7.3948163986206055,
158
- "rewards/rejected": -2.118751287460327,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.5714285714285714,
163
- "grad_norm": 671.6213883992457,
164
- "learning_rate": 2.3250543366050071e-07,
165
- "logits/chosen": -4.27265739440918,
166
- "logits/rejected": -4.471877098083496,
167
- "logps/chosen": -299.2139892578125,
168
- "logps/rejected": -262.4172668457031,
169
- "loss": 0.3587,
170
- "rewards/accuracies": 0.8812500238418579,
171
- "rewards/chosen": 5.20701265335083,
172
- "rewards/margins": 7.200909614562988,
173
- "rewards/rejected": -1.9938958883285522,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.6285714285714286,
178
- "grad_norm": 731.7704645558294,
179
- "learning_rate": 1.8328414484826743e-07,
180
- "logits/chosen": -4.198658466339111,
181
- "logits/rejected": -4.5151848793029785,
182
- "logps/chosen": -299.356689453125,
183
- "logps/rejected": -248.8483428955078,
184
- "loss": 0.3809,
185
- "rewards/accuracies": 0.893750011920929,
186
- "rewards/chosen": 5.6889519691467285,
187
- "rewards/margins": 7.738437652587891,
188
- "rewards/rejected": -2.049485683441162,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.6857142857142857,
193
- "grad_norm": 763.5659705732334,
194
- "learning_rate": 1.3672529644823003e-07,
195
- "logits/chosen": -4.348945140838623,
196
- "logits/rejected": -4.604073524475098,
197
- "logps/chosen": -269.67547607421875,
198
- "logps/rejected": -229.8912811279297,
199
- "loss": 0.3889,
200
  "rewards/accuracies": 0.8656250238418579,
201
- "rewards/chosen": 4.305537700653076,
202
- "rewards/margins": 6.874751091003418,
203
- "rewards/rejected": -2.569213390350342,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.7428571428571429,
208
- "grad_norm": 609.1110882142142,
209
- "learning_rate": 9.468691994696146e-08,
210
- "logits/chosen": -4.341274261474609,
211
- "logits/rejected": -4.5989298820495605,
212
- "logps/chosen": -275.35833740234375,
213
- "logps/rejected": -245.8115692138672,
214
- "loss": 0.3699,
215
- "rewards/accuracies": 0.871874988079071,
216
- "rewards/chosen": 4.084762096405029,
217
- "rewards/margins": 6.567566871643066,
218
- "rewards/rejected": -2.4828040599823,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.8,
223
- "grad_norm": 833.6810153426302,
224
- "learning_rate": 5.884664762850466e-08,
225
- "logits/chosen": -4.367494106292725,
226
- "logits/rejected": -4.598031044006348,
227
- "logps/chosen": -273.83099365234375,
228
- "logps/rejected": -237.6991729736328,
229
- "loss": 0.381,
230
- "rewards/accuracies": 0.8656250238418579,
231
- "rewards/chosen": 4.479451656341553,
232
- "rewards/margins": 6.597804069519043,
233
- "rewards/rejected": -2.118351697921753,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 0.8571428571428571,
238
- "grad_norm": 677.0337306190108,
239
- "learning_rate": 3.063476303172388e-08,
240
- "logits/chosen": -4.267864227294922,
241
- "logits/rejected": -4.488691806793213,
242
- "logps/chosen": -288.9342041015625,
243
- "logps/rejected": -247.3463592529297,
244
- "loss": 0.392,
245
- "rewards/accuracies": 0.8843749761581421,
246
- "rewards/chosen": 5.401379585266113,
247
- "rewards/margins": 7.644896030426025,
248
- "rewards/rejected": -2.2435173988342285,
249
  "step": 150
250
  },
251
  {
252
- "epoch": 0.9142857142857143,
253
- "grad_norm": 601.5321276048043,
254
- "learning_rate": 1.1177122393998372e-08,
255
- "logits/chosen": -4.331192970275879,
256
- "logits/rejected": -4.530573844909668,
257
- "logps/chosen": -277.52020263671875,
258
- "logps/rejected": -243.84323120117188,
259
- "loss": 0.363,
260
- "rewards/accuracies": 0.8687499761581421,
261
- "rewards/chosen": 4.819538593292236,
262
- "rewards/margins": 6.703360080718994,
263
- "rewards/rejected": -1.8838220834732056,
264
- "step": 160
265
- },
266
- {
267
- "epoch": 0.9714285714285714,
268
- "grad_norm": 771.9761619985129,
269
- "learning_rate": 1.2502249244298879e-09,
270
- "logits/chosen": -4.293517112731934,
271
- "logits/rejected": -4.548079013824463,
272
- "logps/chosen": -295.1907653808594,
273
- "logps/rejected": -248.191650390625,
274
- "loss": 0.3693,
275
- "rewards/accuracies": 0.8843749761581421,
276
- "rewards/chosen": 4.456015586853027,
277
- "rewards/margins": 6.605706214904785,
278
- "rewards/rejected": -2.1496901512145996,
279
- "step": 170
280
- },
281
- {
282
- "epoch": 1.0,
283
- "step": 175,
284
  "total_flos": 0.0,
285
- "train_loss": 0.4268451908656529,
286
- "train_runtime": 5571.7779,
287
- "train_samples_per_second": 8.032,
288
- "train_steps_per_second": 0.031
289
  }
290
  ],
291
  "logging_steps": 10,
292
- "max_steps": 175,
293
  "num_input_tokens_seen": 0,
294
  "num_train_epochs": 1,
295
  "save_steps": 100,
296
- "stateful_callbacks": {
297
- "TrainerControl": {
298
- "args": {
299
- "should_epoch_stop": false,
300
- "should_evaluate": false,
301
- "should_log": false,
302
- "should_save": true,
303
- "should_training_stop": false
304
- },
305
- "attributes": {}
306
- }
307
- },
308
  "total_flos": 0.0,
309
  "train_batch_size": 8,
310
  "trial_name": null,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9984,
5
  "eval_steps": 500,
6
+ "global_step": 156,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0064,
13
+ "grad_norm": 1341.8773394764246,
14
+ "learning_rate": 3.125e-09,
15
+ "logits/chosen": -3.9499800205230713,
16
+ "logits/rejected": -4.237819194793701,
17
+ "logps/chosen": -300.693115234375,
18
+ "logps/rejected": -249.96307373046875,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.064,
28
+ "grad_norm": 1342.2810836893796,
29
+ "learning_rate": 3.125e-08,
30
+ "logits/chosen": -4.129705905914307,
31
+ "logits/rejected": -4.352028846740723,
32
+ "logps/chosen": -351.5079650878906,
33
+ "logps/rejected": -308.8138427734375,
34
+ "loss": 0.7326,
35
+ "rewards/accuracies": 0.3680555522441864,
36
+ "rewards/chosen": -0.04078766331076622,
37
+ "rewards/margins": -0.11378024518489838,
38
+ "rewards/rejected": 0.07299260050058365,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.128,
43
+ "grad_norm": 1252.3965895279962,
44
+ "learning_rate": 4.9899357349880975e-08,
45
+ "logits/chosen": -4.194980144500732,
46
+ "logits/rejected": -4.382790565490723,
47
+ "logps/chosen": -334.9039001464844,
48
+ "logps/rejected": -293.8416748046875,
49
+ "loss": 0.683,
50
+ "rewards/accuracies": 0.581250011920929,
51
+ "rewards/chosen": 0.22410114109516144,
52
+ "rewards/margins": 0.11712154000997543,
53
+ "rewards/rejected": 0.10697959363460541,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.192,
58
+ "grad_norm": 904.3776918610464,
59
+ "learning_rate": 4.877641290737884e-08,
60
+ "logits/chosen": -4.230466365814209,
61
+ "logits/rejected": -4.363996505737305,
62
+ "logps/chosen": -327.71453857421875,
63
+ "logps/rejected": -295.3287658691406,
64
+ "loss": 0.5498,
65
+ "rewards/accuracies": 0.7437499761581421,
66
+ "rewards/chosen": 0.9708820581436157,
67
+ "rewards/margins": 0.5084127187728882,
68
+ "rewards/rejected": 0.46246927976608276,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.256,
73
+ "grad_norm": 894.6327423356746,
74
+ "learning_rate": 4.646121984004665e-08,
75
+ "logits/chosen": -4.1493096351623535,
76
+ "logits/rejected": -4.351648807525635,
77
+ "logps/chosen": -330.09368896484375,
78
+ "logps/rejected": -288.2974853515625,
79
+ "loss": 0.4125,
80
+ "rewards/accuracies": 0.8218749761581421,
81
+ "rewards/chosen": 1.9414455890655518,
82
+ "rewards/margins": 1.1434320211410522,
83
+ "rewards/rejected": 0.7980135083198547,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.32,
88
+ "grad_norm": 706.4309708182283,
89
+ "learning_rate": 4.3069871595684784e-08,
90
+ "logits/chosen": -4.244365215301514,
91
+ "logits/rejected": -4.423664093017578,
92
+ "logps/chosen": -329.6412353515625,
93
+ "logps/rejected": -291.22528076171875,
94
+ "loss": 0.3694,
95
+ "rewards/accuracies": 0.840624988079071,
96
+ "rewards/chosen": 2.6057987213134766,
97
+ "rewards/margins": 1.537340521812439,
98
+ "rewards/rejected": 1.068458080291748,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.384,
103
+ "grad_norm": 679.6447682422123,
104
+ "learning_rate": 3.8772424536302564e-08,
105
+ "logits/chosen": -4.262530326843262,
106
+ "logits/rejected": -4.4340620040893555,
107
+ "logps/chosen": -320.7197570800781,
108
+ "logps/rejected": -291.15264892578125,
109
+ "loss": 0.3459,
110
+ "rewards/accuracies": 0.8343750238418579,
111
+ "rewards/chosen": 3.022132158279419,
112
+ "rewards/margins": 1.8344866037368774,
113
+ "rewards/rejected": 1.187645673751831,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.448,
118
+ "grad_norm": 600.9568341116722,
119
+ "learning_rate": 3.378437060203357e-08,
120
+ "logits/chosen": -4.188047885894775,
121
+ "logits/rejected": -4.377224445343018,
122
+ "logps/chosen": -320.23345947265625,
123
+ "logps/rejected": -288.5027770996094,
124
+ "loss": 0.3189,
125
+ "rewards/accuracies": 0.8187500238418579,
126
+ "rewards/chosen": 3.3037331104278564,
127
+ "rewards/margins": 2.1254096031188965,
128
+ "rewards/rejected": 1.1783230304718018,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.512,
133
+ "grad_norm": 654.7049863576665,
134
+ "learning_rate": 2.8355831645441387e-08,
135
+ "logits/chosen": -4.0522565841674805,
136
+ "logits/rejected": -4.341280937194824,
137
+ "logps/chosen": -345.8344421386719,
138
+ "logps/rejected": -307.4328918457031,
139
+ "loss": 0.3105,
140
+ "rewards/accuracies": 0.8999999761581421,
141
+ "rewards/chosen": 3.7246456146240234,
142
+ "rewards/margins": 2.5337729454040527,
143
+ "rewards/rejected": 1.1908724308013916,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.576,
148
+ "grad_norm": 638.1282144295093,
149
+ "learning_rate": 2.2759017277414164e-08,
150
+ "logits/chosen": -4.180428504943848,
151
+ "logits/rejected": -4.390549659729004,
152
+ "logps/chosen": -332.82275390625,
153
+ "logps/rejected": -295.1810607910156,
154
+ "loss": 0.3099,
155
+ "rewards/accuracies": 0.875,
156
+ "rewards/chosen": 3.2552542686462402,
157
+ "rewards/margins": 2.3172354698181152,
158
+ "rewards/rejected": 0.9380186796188354,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.64,
163
+ "grad_norm": 680.3285346474286,
164
+ "learning_rate": 1.7274575140626317e-08,
165
+ "logits/chosen": -4.167009353637695,
166
+ "logits/rejected": -4.386021614074707,
167
+ "logps/chosen": -330.049560546875,
168
+ "logps/rejected": -285.8011169433594,
169
+ "loss": 0.3123,
170
+ "rewards/accuracies": 0.8843749761581421,
171
+ "rewards/chosen": 3.6218514442443848,
172
+ "rewards/margins": 2.723836898803711,
173
+ "rewards/rejected": 0.8980148434638977,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.704,
178
+ "grad_norm": 616.2712616857408,
179
+ "learning_rate": 1.217751806485235e-08,
180
+ "logits/chosen": -4.145500183105469,
181
+ "logits/rejected": -4.386542320251465,
182
+ "logps/chosen": -311.7583923339844,
183
+ "logps/rejected": -276.3233947753906,
184
+ "loss": 0.3022,
185
+ "rewards/accuracies": 0.8843749761581421,
186
+ "rewards/chosen": 3.584909439086914,
187
+ "rewards/margins": 2.6118006706237793,
188
+ "rewards/rejected": 0.9731090664863586,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.768,
193
+ "grad_norm": 649.1888991009114,
194
+ "learning_rate": 7.723433775328384e-09,
195
+ "logits/chosen": -4.141805171966553,
196
+ "logits/rejected": -4.35054874420166,
197
+ "logps/chosen": -325.5559997558594,
198
+ "logps/rejected": -280.5980529785156,
199
+ "loss": 0.3033,
200
  "rewards/accuracies": 0.8656250238418579,
201
+ "rewards/chosen": 3.6838138103485107,
202
+ "rewards/margins": 2.6417319774627686,
203
+ "rewards/rejected": 1.0420820713043213,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.832,
208
+ "grad_norm": 747.4298760038148,
209
+ "learning_rate": 4.135668656967433e-09,
210
+ "logits/chosen": -4.228358268737793,
211
+ "logits/rejected": -4.38976526260376,
212
+ "logps/chosen": -331.02642822265625,
213
+ "logps/rejected": -286.7439880371094,
214
+ "loss": 0.3064,
215
+ "rewards/accuracies": 0.875,
216
+ "rewards/chosen": 3.7264277935028076,
217
+ "rewards/margins": 2.6530587673187256,
218
+ "rewards/rejected": 1.073369026184082,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.896,
223
+ "grad_norm": 697.5841535989922,
224
+ "learning_rate": 1.5941282340065698e-09,
225
+ "logits/chosen": -4.18213415145874,
226
+ "logits/rejected": -4.3970947265625,
227
+ "logps/chosen": -332.56500244140625,
228
+ "logps/rejected": -303.63543701171875,
229
+ "loss": 0.3069,
230
+ "rewards/accuracies": 0.856249988079071,
231
+ "rewards/chosen": 3.5617058277130127,
232
+ "rewards/margins": 2.6050186157226562,
233
+ "rewards/rejected": 0.9566874504089355,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 0.96,
238
+ "grad_norm": 567.1610784183449,
239
+ "learning_rate": 2.262559558016325e-10,
240
+ "logits/chosen": -4.118973731994629,
241
+ "logits/rejected": -4.348026752471924,
242
+ "logps/chosen": -339.0107116699219,
243
+ "logps/rejected": -295.09564208984375,
244
+ "loss": 0.3078,
245
+ "rewards/accuracies": 0.8656250238418579,
246
+ "rewards/chosen": 3.7477049827575684,
247
+ "rewards/margins": 2.61022686958313,
248
+ "rewards/rejected": 1.1374781131744385,
249
  "step": 150
250
  },
251
  {
252
+ "epoch": 0.9984,
253
+ "step": 156,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  "total_flos": 0.0,
255
+ "train_loss": 0.3884877807054764,
256
+ "train_runtime": 4677.6403,
257
+ "train_samples_per_second": 8.539,
258
+ "train_steps_per_second": 0.033
259
  }
260
  ],
261
  "logging_steps": 10,
262
+ "max_steps": 156,
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 1,
265
  "save_steps": 100,
 
 
 
 
 
 
 
 
 
 
 
 
266
  "total_flos": 0.0,
267
  "train_batch_size": 8,
268
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b92372e760a727f6582979303a96b389cee04d00a2f4a6c534c36c4b19bfdc3f
3
- size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44368936e8a5f160f38c764acd78a1ed87cb99f1b31bc5a44994e052788c660a
3
+ size 6392