lewtun HF staff commited on
Commit
10b512a
1 Parent(s): 17e3ec2

Model save

Browse files
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: mistralai/Mistral-7B-v0.1
9
+ model-index:
10
+ - name: zephyr-7b-dpo-qlora-fix
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # zephyr-7b-dpo-qlora-fix
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5279
22
+ - Rewards/chosen: -1.0268
23
+ - Rewards/rejected: -1.8204
24
+ - Rewards/accuracies: 0.7617
25
+ - Rewards/margins: 0.7936
26
+ - Logps/rejected: -429.5990
27
+ - Logps/chosen: -349.1275
28
+ - Logits/rejected: 1.1048
29
+ - Logits/chosen: 1.1977
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-06
49
+ - train_batch_size: 4
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 8
54
+ - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 128
56
+ - total_eval_batch_size: 64
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: cosine
59
+ - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
+
62
+ ### Training results
63
+
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.5985 | 0.21 | 100 | 0.6167 | -0.6622 | -0.9981 | 0.7031 | 0.3359 | -347.3664 | -312.6618 | -2.0061 | -1.9992 |
67
+ | 0.5302 | 0.42 | 200 | 0.5495 | -0.8758 | -1.5987 | 0.7461 | 0.7229 | -407.4292 | -334.0204 | 0.3116 | 0.4001 |
68
+ | 0.533 | 0.63 | 300 | 0.5384 | -0.8142 | -1.5157 | 0.7617 | 0.7016 | -399.1313 | -327.8605 | 0.5716 | 0.6809 |
69
+ | 0.518 | 0.84 | 400 | 0.5276 | -1.0554 | -1.8498 | 0.75 | 0.7944 | -432.5438 | -351.9892 | 1.1053 | 1.1955 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - PEFT 0.7.1
75
+ - Transformers 4.36.2
76
+ - Pytorch 2.1.2+cu121
77
+ - Datasets 2.14.6
78
+ - Tokenizers 0.15.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4e2bb7e9f58d41b55eb5c27c1081cc15bd4255a67f81d706fe057c54cc224a0
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b8c0474916d9bca6c774bf485ef22df5555578bb9f71a82560b0ab9c539161
3
  size 671150064
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": 1.1976563930511475,
4
+ "eval_logits/rejected": 1.104769229888916,
5
+ "eval_logps/chosen": -349.1275329589844,
6
+ "eval_logps/rejected": -429.5989685058594,
7
+ "eval_loss": 0.5278915762901306,
8
+ "eval_rewards/accuracies": 0.76171875,
9
+ "eval_rewards/chosen": -1.0268259048461914,
10
+ "eval_rewards/margins": 0.7935623526573181,
11
+ "eval_rewards/rejected": -1.8203881978988647,
12
+ "eval_runtime": 72.3078,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 27.66,
15
+ "eval_steps_per_second": 0.443,
16
+ "train_loss": 0.5624207920498319,
17
+ "train_runtime": 4926.1363,
18
+ "train_samples": 61135,
19
+ "train_samples_per_second": 12.41,
20
+ "train_steps_per_second": 0.097
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": 1.1976563930511475,
4
+ "eval_logits/rejected": 1.104769229888916,
5
+ "eval_logps/chosen": -349.1275329589844,
6
+ "eval_logps/rejected": -429.5989685058594,
7
+ "eval_loss": 0.5278915762901306,
8
+ "eval_rewards/accuracies": 0.76171875,
9
+ "eval_rewards/chosen": -1.0268259048461914,
10
+ "eval_rewards/margins": 0.7935623526573181,
11
+ "eval_rewards/rejected": -1.8203881978988647,
12
+ "eval_runtime": 72.3078,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 27.66,
15
+ "eval_steps_per_second": 0.443
16
+ }
runs/Feb02_15-24-17_ip-26-0-165-38/events.out.tfevents.1706887790.ip-26-0-165-38.1953627.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9936796d69e4acc42092576ad36b369aeb32c347b5b164a003e409408ed47f72
3
- size 33729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6faf2dee1587b724e87ddf6a767e4d2fd4a4c803d204209137bf6fcdefeeec
3
+ size 38521
runs/Feb02_15-24-17_ip-26-0-165-38/events.out.tfevents.1706892789.ip-26-0-165-38.1953627.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6855d5dcdc3a4e7ceca071704d97d4da0fe2b39d1331a42a9d82f1b7420090f9
3
+ size 828
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5624207920498319,
4
+ "train_runtime": 4926.1363,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 12.41,
7
+ "train_steps_per_second": 0.097
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984301412872841,
5
+ "eval_steps": 100,
6
+ "global_step": 477,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.0416666666666667e-07,
14
+ "logits/chosen": -2.5889174938201904,
15
+ "logits/rejected": -2.4813222885131836,
16
+ "logps/chosen": -289.8450622558594,
17
+ "logps/rejected": -264.9564514160156,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.02,
27
+ "learning_rate": 1.0416666666666667e-06,
28
+ "logits/chosen": -2.388092517852783,
29
+ "logits/rejected": -2.4257497787475586,
30
+ "logps/chosen": -260.3330078125,
31
+ "logps/rejected": -219.36460876464844,
32
+ "loss": 0.6925,
33
+ "rewards/accuracies": 0.5277777910232544,
34
+ "rewards/chosen": 0.006709899753332138,
35
+ "rewards/margins": 0.001456400495953858,
36
+ "rewards/rejected": 0.005253499373793602,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.04,
41
+ "learning_rate": 2.0833333333333334e-06,
42
+ "logits/chosen": -2.4777274131774902,
43
+ "logits/rejected": -2.4627153873443604,
44
+ "logps/chosen": -269.85845947265625,
45
+ "logps/rejected": -241.0397186279297,
46
+ "loss": 0.6883,
47
+ "rewards/accuracies": 0.6187499761581421,
48
+ "rewards/chosen": 0.036987464874982834,
49
+ "rewards/margins": 0.010376101359724998,
50
+ "rewards/rejected": 0.026611363515257835,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 3.125e-06,
56
+ "logits/chosen": -2.4684128761291504,
57
+ "logits/rejected": -2.4569170475006104,
58
+ "logps/chosen": -248.72634887695312,
59
+ "logps/rejected": -245.45999145507812,
60
+ "loss": 0.6814,
61
+ "rewards/accuracies": 0.625,
62
+ "rewards/chosen": 0.04978276044130325,
63
+ "rewards/margins": 0.02046312391757965,
64
+ "rewards/rejected": 0.029319632798433304,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.08,
69
+ "learning_rate": 4.166666666666667e-06,
70
+ "logits/chosen": -2.350592613220215,
71
+ "logits/rejected": -2.3819007873535156,
72
+ "logps/chosen": -286.8092041015625,
73
+ "logps/rejected": -258.41522216796875,
74
+ "loss": 0.6638,
75
+ "rewards/accuracies": 0.699999988079071,
76
+ "rewards/chosen": 0.04874344915151596,
77
+ "rewards/margins": 0.06839489936828613,
78
+ "rewards/rejected": -0.019651446491479874,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.1,
83
+ "learning_rate": 4.999731868769027e-06,
84
+ "logits/chosen": -2.371358871459961,
85
+ "logits/rejected": -2.3549230098724365,
86
+ "logps/chosen": -286.4510192871094,
87
+ "logps/rejected": -292.4720153808594,
88
+ "loss": 0.6532,
89
+ "rewards/accuracies": 0.675000011920929,
90
+ "rewards/chosen": -0.040428485721349716,
91
+ "rewards/margins": 0.11083565652370453,
92
+ "rewards/rejected": -0.15126413106918335,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.13,
97
+ "learning_rate": 4.9903533134293035e-06,
98
+ "logits/chosen": -2.3133959770202637,
99
+ "logits/rejected": -2.2497916221618652,
100
+ "logps/chosen": -266.2447204589844,
101
+ "logps/rejected": -250.51513671875,
102
+ "loss": 0.6381,
103
+ "rewards/accuracies": 0.6875,
104
+ "rewards/chosen": -0.177861288189888,
105
+ "rewards/margins": 0.15977302193641663,
106
+ "rewards/rejected": -0.33763426542282104,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.15,
111
+ "learning_rate": 4.967625656594782e-06,
112
+ "logits/chosen": -2.359027624130249,
113
+ "logits/rejected": -2.3221073150634766,
114
+ "logps/chosen": -293.37640380859375,
115
+ "logps/rejected": -272.8565979003906,
116
+ "loss": 0.6222,
117
+ "rewards/accuracies": 0.637499988079071,
118
+ "rewards/chosen": -0.28658825159072876,
119
+ "rewards/margins": 0.1545184701681137,
120
+ "rewards/rejected": -0.4411067068576813,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.17,
125
+ "learning_rate": 4.93167072587771e-06,
126
+ "logits/chosen": -2.2765626907348633,
127
+ "logits/rejected": -2.2555174827575684,
128
+ "logps/chosen": -295.1297607421875,
129
+ "logps/rejected": -313.0910339355469,
130
+ "loss": 0.6196,
131
+ "rewards/accuracies": 0.706250011920929,
132
+ "rewards/chosen": -0.2227451503276825,
133
+ "rewards/margins": 0.2126256674528122,
134
+ "rewards/rejected": -0.43537086248397827,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.19,
139
+ "learning_rate": 4.882681251368549e-06,
140
+ "logits/chosen": -2.245281219482422,
141
+ "logits/rejected": -2.227741241455078,
142
+ "logps/chosen": -269.5973205566406,
143
+ "logps/rejected": -291.8226318359375,
144
+ "loss": 0.5989,
145
+ "rewards/accuracies": 0.699999988079071,
146
+ "rewards/chosen": -0.3465212285518646,
147
+ "rewards/margins": 0.26312902569770813,
148
+ "rewards/rejected": -0.6096502542495728,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.21,
153
+ "learning_rate": 4.8209198325401815e-06,
154
+ "logits/chosen": -2.2300758361816406,
155
+ "logits/rejected": -2.1660382747650146,
156
+ "logps/chosen": -315.1083068847656,
157
+ "logps/rejected": -333.6819763183594,
158
+ "loss": 0.5985,
159
+ "rewards/accuracies": 0.699999988079071,
160
+ "rewards/chosen": -0.5296173691749573,
161
+ "rewards/margins": 0.28275665640830994,
162
+ "rewards/rejected": -0.8123741149902344,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.21,
167
+ "eval_logits/chosen": -1.999224066734314,
168
+ "eval_logits/rejected": -2.0060811042785645,
169
+ "eval_logps/chosen": -312.6618347167969,
170
+ "eval_logps/rejected": -347.36639404296875,
171
+ "eval_loss": 0.6166529059410095,
172
+ "eval_rewards/accuracies": 0.703125,
173
+ "eval_rewards/chosen": -0.6621690392494202,
174
+ "eval_rewards/margins": 0.3358937203884125,
175
+ "eval_rewards/rejected": -0.9980627298355103,
176
+ "eval_runtime": 73.6215,
177
+ "eval_samples_per_second": 27.166,
178
+ "eval_steps_per_second": 0.435,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 0.23,
183
+ "learning_rate": 4.746717530629565e-06,
184
+ "logits/chosen": -2.0477728843688965,
185
+ "logits/rejected": -2.079465389251709,
186
+ "logps/chosen": -331.4808349609375,
187
+ "logps/rejected": -322.7523193359375,
188
+ "loss": 0.603,
189
+ "rewards/accuracies": 0.737500011920929,
190
+ "rewards/chosen": -0.47515854239463806,
191
+ "rewards/margins": 0.4070638120174408,
192
+ "rewards/rejected": -0.8822224736213684,
193
+ "step": 110
194
+ },
195
+ {
196
+ "epoch": 0.25,
197
+ "learning_rate": 4.660472094042121e-06,
198
+ "logits/chosen": -1.9152177572250366,
199
+ "logits/rejected": -1.9233732223510742,
200
+ "logps/chosen": -306.1752014160156,
201
+ "logps/rejected": -293.25958251953125,
202
+ "loss": 0.5842,
203
+ "rewards/accuracies": 0.737500011920929,
204
+ "rewards/chosen": -0.44953522086143494,
205
+ "rewards/margins": 0.3475509285926819,
206
+ "rewards/rejected": -0.7970861196517944,
207
+ "step": 120
208
+ },
209
+ {
210
+ "epoch": 0.27,
211
+ "learning_rate": 4.5626458262912745e-06,
212
+ "logits/chosen": -1.70361328125,
213
+ "logits/rejected": -1.6728603839874268,
214
+ "logps/chosen": -318.77362060546875,
215
+ "logps/rejected": -358.8970642089844,
216
+ "loss": 0.5741,
217
+ "rewards/accuracies": 0.7124999761581421,
218
+ "rewards/chosen": -0.5537757873535156,
219
+ "rewards/margins": 0.4414924681186676,
220
+ "rewards/rejected": -0.9952683448791504,
221
+ "step": 130
222
+ },
223
+ {
224
+ "epoch": 0.29,
225
+ "learning_rate": 4.453763107901676e-06,
226
+ "logits/chosen": -1.389970302581787,
227
+ "logits/rejected": -1.3908889293670654,
228
+ "logps/chosen": -365.573974609375,
229
+ "logps/rejected": -389.08026123046875,
230
+ "loss": 0.5673,
231
+ "rewards/accuracies": 0.7562500238418579,
232
+ "rewards/chosen": -0.8941513895988464,
233
+ "rewards/margins": 0.5348166823387146,
234
+ "rewards/rejected": -1.4289681911468506,
235
+ "step": 140
236
+ },
237
+ {
238
+ "epoch": 0.31,
239
+ "learning_rate": 4.33440758555951e-06,
240
+ "logits/chosen": -1.2829688787460327,
241
+ "logits/rejected": -1.1666593551635742,
242
+ "logps/chosen": -331.8448181152344,
243
+ "logps/rejected": -347.62164306640625,
244
+ "loss": 0.5587,
245
+ "rewards/accuracies": 0.7250000238418579,
246
+ "rewards/chosen": -0.8844560384750366,
247
+ "rewards/margins": 0.41986608505249023,
248
+ "rewards/rejected": -1.304322361946106,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.33,
253
+ "learning_rate": 4.205219043576955e-06,
254
+ "logits/chosen": -0.9949747323989868,
255
+ "logits/rejected": -0.5801770687103271,
256
+ "logps/chosen": -321.4789123535156,
257
+ "logps/rejected": -394.52911376953125,
258
+ "loss": 0.5409,
259
+ "rewards/accuracies": 0.75,
260
+ "rewards/chosen": -0.5671305060386658,
261
+ "rewards/margins": 0.7361670732498169,
262
+ "rewards/rejected": -1.3032976388931274,
263
+ "step": 160
264
+ },
265
+ {
266
+ "epoch": 0.36,
267
+ "learning_rate": 4.066889974440757e-06,
268
+ "logits/chosen": -0.5876813530921936,
269
+ "logits/rejected": -0.3649616539478302,
270
+ "logps/chosen": -293.5026550292969,
271
+ "logps/rejected": -311.59423828125,
272
+ "loss": 0.5916,
273
+ "rewards/accuracies": 0.65625,
274
+ "rewards/chosen": -0.5926370024681091,
275
+ "rewards/margins": 0.5672849416732788,
276
+ "rewards/rejected": -1.1599220037460327,
277
+ "step": 170
278
+ },
279
+ {
280
+ "epoch": 0.38,
281
+ "learning_rate": 3.92016186682789e-06,
282
+ "logits/chosen": -0.8265250325202942,
283
+ "logits/rejected": -0.7326392531394958,
284
+ "logps/chosen": -324.5079650878906,
285
+ "logps/rejected": -388.0706481933594,
286
+ "loss": 0.5533,
287
+ "rewards/accuracies": 0.6875,
288
+ "rewards/chosen": -0.7355499863624573,
289
+ "rewards/margins": 0.6323290467262268,
290
+ "rewards/rejected": -1.3678789138793945,
291
+ "step": 180
292
+ },
293
+ {
294
+ "epoch": 0.4,
295
+ "learning_rate": 3.7658212309857576e-06,
296
+ "logits/chosen": -0.7887569665908813,
297
+ "logits/rejected": -0.5450983643531799,
298
+ "logps/chosen": -341.79412841796875,
299
+ "logps/rejected": -386.400146484375,
300
+ "loss": 0.5368,
301
+ "rewards/accuracies": 0.731249988079071,
302
+ "rewards/chosen": -0.7941572666168213,
303
+ "rewards/margins": 0.5156592130661011,
304
+ "rewards/rejected": -1.309816598892212,
305
+ "step": 190
306
+ },
307
+ {
308
+ "epoch": 0.42,
309
+ "learning_rate": 3.604695382782159e-06,
310
+ "logits/chosen": -0.2513345181941986,
311
+ "logits/rejected": -0.26383644342422485,
312
+ "logps/chosen": -360.3321228027344,
313
+ "logps/rejected": -372.03326416015625,
314
+ "loss": 0.5302,
315
+ "rewards/accuracies": 0.699999988079071,
316
+ "rewards/chosen": -0.8325196504592896,
317
+ "rewards/margins": 0.6745045185089111,
318
+ "rewards/rejected": -1.5070240497589111,
319
+ "step": 200
320
+ },
321
+ {
322
+ "epoch": 0.42,
323
+ "eval_logits/chosen": 0.4000808000564575,
324
+ "eval_logits/rejected": 0.31158187985420227,
325
+ "eval_logps/chosen": -334.02044677734375,
326
+ "eval_logps/rejected": -407.42919921875,
327
+ "eval_loss": 0.54950350522995,
328
+ "eval_rewards/accuracies": 0.74609375,
329
+ "eval_rewards/chosen": -0.8757554292678833,
330
+ "eval_rewards/margins": 0.7229353189468384,
331
+ "eval_rewards/rejected": -1.5986907482147217,
332
+ "eval_runtime": 72.2579,
333
+ "eval_samples_per_second": 27.679,
334
+ "eval_steps_per_second": 0.443,
335
+ "step": 200
336
+ },
337
+ {
338
+ "epoch": 0.44,
339
+ "learning_rate": 3.437648009023905e-06,
340
+ "logits/chosen": -0.355979859828949,
341
+ "logits/rejected": -0.13378120958805084,
342
+ "logps/chosen": -354.21832275390625,
343
+ "logps/rejected": -387.26171875,
344
+ "loss": 0.5542,
345
+ "rewards/accuracies": 0.637499988079071,
346
+ "rewards/chosen": -0.7991288304328918,
347
+ "rewards/margins": 0.5489404201507568,
348
+ "rewards/rejected": -1.348069190979004,
349
+ "step": 210
350
+ },
351
+ {
352
+ "epoch": 0.46,
353
+ "learning_rate": 3.265574537815398e-06,
354
+ "logits/chosen": -0.2700692415237427,
355
+ "logits/rejected": -0.08166289329528809,
356
+ "logps/chosen": -290.8014831542969,
357
+ "logps/rejected": -332.3136901855469,
358
+ "loss": 0.569,
359
+ "rewards/accuracies": 0.668749988079071,
360
+ "rewards/chosen": -0.608197033405304,
361
+ "rewards/margins": 0.507323145866394,
362
+ "rewards/rejected": -1.1155202388763428,
363
+ "step": 220
364
+ },
365
+ {
366
+ "epoch": 0.48,
367
+ "learning_rate": 3.089397338773569e-06,
368
+ "logits/chosen": -0.06517831236124039,
369
+ "logits/rejected": 0.004061543848365545,
370
+ "logps/chosen": -317.51495361328125,
371
+ "logps/rejected": -371.9200439453125,
372
+ "loss": 0.5576,
373
+ "rewards/accuracies": 0.71875,
374
+ "rewards/chosen": -0.7464595437049866,
375
+ "rewards/margins": 0.5365883111953735,
376
+ "rewards/rejected": -1.2830479145050049,
377
+ "step": 230
378
+ },
379
+ {
380
+ "epoch": 0.5,
381
+ "learning_rate": 2.9100607788275547e-06,
382
+ "logits/chosen": -0.13817472755908966,
383
+ "logits/rejected": -0.04675758630037308,
384
+ "logps/chosen": -334.0376281738281,
385
+ "logps/rejected": -356.2500915527344,
386
+ "loss": 0.5592,
387
+ "rewards/accuracies": 0.6625000238418579,
388
+ "rewards/chosen": -0.908129096031189,
389
+ "rewards/margins": 0.4950867295265198,
390
+ "rewards/rejected": -1.403215765953064,
391
+ "step": 240
392
+ },
393
+ {
394
+ "epoch": 0.52,
395
+ "learning_rate": 2.72852616010567e-06,
396
+ "logits/chosen": -0.0858619436621666,
397
+ "logits/rejected": 0.23496215045452118,
398
+ "logps/chosen": -347.7973327636719,
399
+ "logps/rejected": -415.5159606933594,
400
+ "loss": 0.539,
401
+ "rewards/accuracies": 0.7437499761581421,
402
+ "rewards/chosen": -0.9283957481384277,
403
+ "rewards/margins": 0.7460072636604309,
404
+ "rewards/rejected": -1.6744029521942139,
405
+ "step": 250
406
+ },
407
+ {
408
+ "epoch": 0.54,
409
+ "learning_rate": 2.5457665670441937e-06,
410
+ "logits/chosen": 0.35799893736839294,
411
+ "logits/rejected": 0.7192636728286743,
412
+ "logps/chosen": -364.86138916015625,
413
+ "logps/rejected": -400.7781066894531,
414
+ "loss": 0.5156,
415
+ "rewards/accuracies": 0.7562500238418579,
416
+ "rewards/chosen": -1.0014389753341675,
417
+ "rewards/margins": 0.67198646068573,
418
+ "rewards/rejected": -1.673425316810608,
419
+ "step": 260
420
+ },
421
+ {
422
+ "epoch": 0.57,
423
+ "learning_rate": 2.3627616503391813e-06,
424
+ "logits/chosen": 0.7610759735107422,
425
+ "logits/rejected": 0.6359414458274841,
426
+ "logps/chosen": -394.61541748046875,
427
+ "logps/rejected": -451.08563232421875,
428
+ "loss": 0.5183,
429
+ "rewards/accuracies": 0.7124999761581421,
430
+ "rewards/chosen": -1.1308982372283936,
431
+ "rewards/margins": 0.7238563299179077,
432
+ "rewards/rejected": -1.8547546863555908,
433
+ "step": 270
434
+ },
435
+ {
436
+ "epoch": 0.59,
437
+ "learning_rate": 2.1804923757009885e-06,
438
+ "logits/chosen": 1.2029451131820679,
439
+ "logits/rejected": 1.3649919033050537,
440
+ "logps/chosen": -380.61932373046875,
441
+ "logps/rejected": -396.6190490722656,
442
+ "loss": 0.5485,
443
+ "rewards/accuracies": 0.6937500238418579,
444
+ "rewards/chosen": -1.2345253229141235,
445
+ "rewards/margins": 0.6180087327957153,
446
+ "rewards/rejected": -1.8525340557098389,
447
+ "step": 280
448
+ },
449
+ {
450
+ "epoch": 0.61,
451
+ "learning_rate": 1.9999357655598894e-06,
452
+ "logits/chosen": 0.7018269300460815,
453
+ "logits/rejected": 0.5022421479225159,
454
+ "logps/chosen": -329.9044189453125,
455
+ "logps/rejected": -380.2184143066406,
456
+ "loss": 0.5464,
457
+ "rewards/accuracies": 0.71875,
458
+ "rewards/chosen": -0.9144694209098816,
459
+ "rewards/margins": 0.6528812050819397,
460
+ "rewards/rejected": -1.5673506259918213,
461
+ "step": 290
462
+ },
463
+ {
464
+ "epoch": 0.63,
465
+ "learning_rate": 1.8220596619089576e-06,
466
+ "logits/chosen": 0.12925171852111816,
467
+ "logits/rejected": 0.4913701117038727,
468
+ "logps/chosen": -334.60748291015625,
469
+ "logps/rejected": -398.9501647949219,
470
+ "loss": 0.533,
471
+ "rewards/accuracies": 0.762499988079071,
472
+ "rewards/chosen": -0.7113243341445923,
473
+ "rewards/margins": 0.6202197670936584,
474
+ "rewards/rejected": -1.3315439224243164,
475
+ "step": 300
476
+ },
477
+ {
478
+ "epoch": 0.63,
479
+ "eval_logits/chosen": 0.6809147000312805,
480
+ "eval_logits/rejected": 0.5716233849525452,
481
+ "eval_logps/chosen": -327.86053466796875,
482
+ "eval_logps/rejected": -399.1313171386719,
483
+ "eval_loss": 0.5384255051612854,
484
+ "eval_rewards/accuracies": 0.76171875,
485
+ "eval_rewards/chosen": -0.8141559362411499,
486
+ "eval_rewards/margins": 0.7015555500984192,
487
+ "eval_rewards/rejected": -1.5157114267349243,
488
+ "eval_runtime": 72.3068,
489
+ "eval_samples_per_second": 27.66,
490
+ "eval_steps_per_second": 0.443,
491
+ "step": 300
492
+ },
493
+ {
494
+ "epoch": 0.65,
495
+ "learning_rate": 1.647817538357072e-06,
496
+ "logits/chosen": 0.2949855327606201,
497
+ "logits/rejected": 0.6500759124755859,
498
+ "logps/chosen": -364.088134765625,
499
+ "logps/rejected": -378.37872314453125,
500
+ "loss": 0.5199,
501
+ "rewards/accuracies": 0.737500011920929,
502
+ "rewards/chosen": -1.0496848821640015,
503
+ "rewards/margins": 0.6011860966682434,
504
+ "rewards/rejected": -1.6508712768554688,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.67,
509
+ "learning_rate": 1.4781433892011132e-06,
510
+ "logits/chosen": 1.0437233448028564,
511
+ "logits/rejected": 0.9824169278144836,
512
+ "logps/chosen": -368.3329772949219,
513
+ "logps/rejected": -443.03125,
514
+ "loss": 0.5141,
515
+ "rewards/accuracies": 0.7437499761581421,
516
+ "rewards/chosen": -1.1911169290542603,
517
+ "rewards/margins": 0.9159032702445984,
518
+ "rewards/rejected": -2.107020139694214,
519
+ "step": 320
520
+ },
521
+ {
522
+ "epoch": 0.69,
523
+ "learning_rate": 1.3139467229135999e-06,
524
+ "logits/chosen": 0.43222084641456604,
525
+ "logits/rejected": 0.5403100252151489,
526
+ "logps/chosen": -406.03656005859375,
527
+ "logps/rejected": -415.65667724609375,
528
+ "loss": 0.5332,
529
+ "rewards/accuracies": 0.706250011920929,
530
+ "rewards/chosen": -1.136765718460083,
531
+ "rewards/margins": 0.6447176337242126,
532
+ "rewards/rejected": -1.7814832925796509,
533
+ "step": 330
534
+ },
535
+ {
536
+ "epoch": 0.71,
537
+ "learning_rate": 1.1561076868822756e-06,
538
+ "logits/chosen": 0.783063530921936,
539
+ "logits/rejected": 0.8185106515884399,
540
+ "logps/chosen": -352.68560791015625,
541
+ "logps/rejected": -389.525390625,
542
+ "loss": 0.5327,
543
+ "rewards/accuracies": 0.6875,
544
+ "rewards/chosen": -1.0253931283950806,
545
+ "rewards/margins": 0.48898547887802124,
546
+ "rewards/rejected": -1.514378547668457,
547
+ "step": 340
548
+ },
549
+ {
550
+ "epoch": 0.73,
551
+ "learning_rate": 1.0054723495346484e-06,
552
+ "logits/chosen": 0.8168965578079224,
553
+ "logits/rejected": 0.7234944105148315,
554
+ "logps/chosen": -320.1710510253906,
555
+ "logps/rejected": -381.6952209472656,
556
+ "loss": 0.5137,
557
+ "rewards/accuracies": 0.768750011920929,
558
+ "rewards/chosen": -1.005994200706482,
559
+ "rewards/margins": 0.8249969482421875,
560
+ "rewards/rejected": -1.8309911489486694,
561
+ "step": 350
562
+ },
563
+ {
564
+ "epoch": 0.75,
565
+ "learning_rate": 8.628481651367876e-07,
566
+ "logits/chosen": 0.688895046710968,
567
+ "logits/rejected": 1.0163129568099976,
568
+ "logps/chosen": -380.68157958984375,
569
+ "logps/rejected": -419.84234619140625,
570
+ "loss": 0.5228,
571
+ "rewards/accuracies": 0.6937500238418579,
572
+ "rewards/chosen": -1.1236674785614014,
573
+ "rewards/margins": 0.698872447013855,
574
+ "rewards/rejected": -1.8225399255752563,
575
+ "step": 360
576
+ },
577
+ {
578
+ "epoch": 0.77,
579
+ "learning_rate": 7.289996455765749e-07,
580
+ "logits/chosen": 0.6197929382324219,
581
+ "logits/rejected": 0.7506722807884216,
582
+ "logps/chosen": -420.45263671875,
583
+ "logps/rejected": -426.29107666015625,
584
+ "loss": 0.5353,
585
+ "rewards/accuracies": 0.7749999761581421,
586
+ "rewards/chosen": -1.1160409450531006,
587
+ "rewards/margins": 0.6991242170333862,
588
+ "rewards/rejected": -1.8151648044586182,
589
+ "step": 370
590
+ },
591
+ {
592
+ "epoch": 0.8,
593
+ "learning_rate": 6.046442623320145e-07,
594
+ "logits/chosen": 0.5390521287918091,
595
+ "logits/rejected": 0.5552736520767212,
596
+ "logps/chosen": -365.60577392578125,
597
+ "logps/rejected": -421.53564453125,
598
+ "loss": 0.5076,
599
+ "rewards/accuracies": 0.737500011920929,
600
+ "rewards/chosen": -1.0088932514190674,
601
+ "rewards/margins": 0.6299774050712585,
602
+ "rewards/rejected": -1.6388708353042603,
603
+ "step": 380
604
+ },
605
+ {
606
+ "epoch": 0.82,
607
+ "learning_rate": 4.904486005914027e-07,
608
+ "logits/chosen": 0.7070841789245605,
609
+ "logits/rejected": 0.8798855543136597,
610
+ "logps/chosen": -369.456787109375,
611
+ "logps/rejected": -406.3539123535156,
612
+ "loss": 0.5334,
613
+ "rewards/accuracies": 0.6812499761581421,
614
+ "rewards/chosen": -1.0536937713623047,
615
+ "rewards/margins": 0.5137845277786255,
616
+ "rewards/rejected": -1.5674784183502197,
617
+ "step": 390
618
+ },
619
+ {
620
+ "epoch": 0.84,
621
+ "learning_rate": 3.8702478614051353e-07,
622
+ "logits/chosen": 0.5031794905662537,
623
+ "logits/rejected": 0.47197189927101135,
624
+ "logps/chosen": -409.0977478027344,
625
+ "logps/rejected": -434.04693603515625,
626
+ "loss": 0.518,
627
+ "rewards/accuracies": 0.668749988079071,
628
+ "rewards/chosen": -1.1779779195785522,
629
+ "rewards/margins": 0.5962620973587036,
630
+ "rewards/rejected": -1.7742401361465454,
631
+ "step": 400
632
+ },
633
+ {
634
+ "epoch": 0.84,
635
+ "eval_logits/chosen": 1.195478081703186,
636
+ "eval_logits/rejected": 1.1053041219711304,
637
+ "eval_logps/chosen": -351.9891662597656,
638
+ "eval_logps/rejected": -432.54376220703125,
639
+ "eval_loss": 0.5275784730911255,
640
+ "eval_rewards/accuracies": 0.75,
641
+ "eval_rewards/chosen": -1.0554425716400146,
642
+ "eval_rewards/margins": 0.7943933010101318,
643
+ "eval_rewards/rejected": -1.849835753440857,
644
+ "eval_runtime": 72.3032,
645
+ "eval_samples_per_second": 27.661,
646
+ "eval_steps_per_second": 0.443,
647
+ "step": 400
648
+ },
649
+ {
650
+ "epoch": 0.86,
651
+ "learning_rate": 2.9492720416985004e-07,
652
+ "logits/chosen": 0.8437727093696594,
653
+ "logits/rejected": 0.800951361656189,
654
+ "logps/chosen": -389.89471435546875,
655
+ "logps/rejected": -402.897705078125,
656
+ "loss": 0.536,
657
+ "rewards/accuracies": 0.6937500238418579,
658
+ "rewards/chosen": -1.1922531127929688,
659
+ "rewards/margins": 0.5932148694992065,
660
+ "rewards/rejected": -1.7854681015014648,
661
+ "step": 410
662
+ },
663
+ {
664
+ "epoch": 0.88,
665
+ "learning_rate": 2.1464952759020857e-07,
666
+ "logits/chosen": 0.8531819581985474,
667
+ "logits/rejected": 0.7011052966117859,
668
+ "logps/chosen": -365.4083557128906,
669
+ "logps/rejected": -429.77392578125,
670
+ "loss": 0.5293,
671
+ "rewards/accuracies": 0.706250011920929,
672
+ "rewards/chosen": -1.1621034145355225,
673
+ "rewards/margins": 0.5474345684051514,
674
+ "rewards/rejected": -1.7095378637313843,
675
+ "step": 420
676
+ },
677
+ {
678
+ "epoch": 0.9,
679
+ "learning_rate": 1.4662207078575685e-07,
680
+ "logits/chosen": 0.9001744389533997,
681
+ "logits/rejected": 0.7595891356468201,
682
+ "logps/chosen": -376.25653076171875,
683
+ "logps/rejected": -414.7420959472656,
684
+ "loss": 0.5296,
685
+ "rewards/accuracies": 0.7124999761581421,
686
+ "rewards/chosen": -1.0551135540008545,
687
+ "rewards/margins": 0.7705513834953308,
688
+ "rewards/rejected": -1.825664758682251,
689
+ "step": 430
690
+ },
691
+ {
692
+ "epoch": 0.92,
693
+ "learning_rate": 9.120948298936422e-08,
694
+ "logits/chosen": 0.6783544421195984,
695
+ "logits/rejected": 0.9758931994438171,
696
+ "logps/chosen": -374.05902099609375,
697
+ "logps/rejected": -429.77825927734375,
698
+ "loss": 0.5249,
699
+ "rewards/accuracies": 0.7562500238418579,
700
+ "rewards/chosen": -1.049117088317871,
701
+ "rewards/margins": 0.7034090161323547,
702
+ "rewards/rejected": -1.752526044845581,
703
+ "step": 440
704
+ },
705
+ {
706
+ "epoch": 0.94,
707
+ "learning_rate": 4.870879364444109e-08,
708
+ "logits/chosen": 0.8478315472602844,
709
+ "logits/rejected": 0.7550174593925476,
710
+ "logps/chosen": -369.22027587890625,
711
+ "logps/rejected": -430.88873291015625,
712
+ "loss": 0.5406,
713
+ "rewards/accuracies": 0.71875,
714
+ "rewards/chosen": -1.088853120803833,
715
+ "rewards/margins": 0.6529593467712402,
716
+ "rewards/rejected": -1.7418124675750732,
717
+ "step": 450
718
+ },
719
+ {
720
+ "epoch": 0.96,
721
+ "learning_rate": 1.93478202307823e-08,
722
+ "logits/chosen": 0.6956031918525696,
723
+ "logits/rejected": 0.911687970161438,
724
+ "logps/chosen": -368.9820251464844,
725
+ "logps/rejected": -417.018310546875,
726
+ "loss": 0.516,
727
+ "rewards/accuracies": 0.78125,
728
+ "rewards/chosen": -1.0993984937667847,
729
+ "rewards/margins": 0.6686335206031799,
730
+ "rewards/rejected": -1.7680320739746094,
731
+ "step": 460
732
+ },
733
+ {
734
+ "epoch": 0.98,
735
+ "learning_rate": 3.283947088983663e-09,
736
+ "logits/chosen": 0.7104119658470154,
737
+ "logits/rejected": 0.7404820322990417,
738
+ "logps/chosen": -359.8403625488281,
739
+ "logps/rejected": -435.29803466796875,
740
+ "loss": 0.509,
741
+ "rewards/accuracies": 0.8125,
742
+ "rewards/chosen": -1.0412371158599854,
743
+ "rewards/margins": 0.8249354362487793,
744
+ "rewards/rejected": -1.8661725521087646,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 1.0,
749
+ "step": 477,
750
+ "total_flos": 0.0,
751
+ "train_loss": 0.5624207920498319,
752
+ "train_runtime": 4926.1363,
753
+ "train_samples_per_second": 12.41,
754
+ "train_steps_per_second": 0.097
755
+ }
756
+ ],
757
+ "logging_steps": 10,
758
+ "max_steps": 477,
759
+ "num_input_tokens_seen": 0,
760
+ "num_train_epochs": 1,
761
+ "save_steps": 100,
762
+ "total_flos": 0.0,
763
+ "train_batch_size": 4,
764
+ "trial_name": null,
765
+ "trial_params": null
766
+ }