Model save
Browse files- README.md +3 -3
- all_results.json +7 -6
- config.json +1 -1
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun18_02-32-34_n136-082-130/events.out.tfevents.1718649430.n136-082-130.4153555.0 +2 -2
- tokenizer.json +1 -0
- train_results.json +7 -6
- trainer_state.json +245 -233
- training_args.bin +2 -2
README.md
CHANGED
@@ -32,7 +32,7 @@ More information needed
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
-
- learning_rate:
|
36 |
- train_batch_size: 8
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
-
- Transformers 4.
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
-
- Tokenizers 0.
|
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 1e-07
|
36 |
- train_batch_size: 8
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
+
- Transformers 4.41.1
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
+
- Tokenizers 0.19.1
|
all_results.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
|
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 0.9972602739726028,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.41821049595927146,
|
5 |
+
"train_runtime": 5820.0845,
|
6 |
+
"train_samples": 46672,
|
7 |
+
"train_samples_per_second": 8.019,
|
8 |
+
"train_steps_per_second": 0.031
|
9 |
}
|
config.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
-
"transformers_version": "4.
|
24 |
"use_cache": false,
|
25 |
"vocab_size": 32000
|
26 |
}
|
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.41.1",
|
24 |
"use_cache": false,
|
25 |
"vocab_size": 32000
|
26 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.41.1"
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7290f1e2c26695e1d8c538d9fe41285a5e52f43708406dcae0878541a4f6d105
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55ca4a716c63ab1b6533bd8ba1f04ef1b977d004fe97544fdad8b7fcb3020a04
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c07c8434b14ba8d74fb0863d0c4cafada3c494cf0444a8a71edfb894b7e87540
|
3 |
size 4540516344
|
runs/Jun18_02-32-34_n136-082-130/events.out.tfevents.1718649430.n136-082-130.4153555.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5836659d6ac9640c5792995c106c1fd759200b2ff12676c12d8bd4326a2db255
|
3 |
+
size 18386
|
tokenizer.json
CHANGED
@@ -134,6 +134,7 @@
|
|
134 |
"end_of_word_suffix": null,
|
135 |
"fuse_unk": true,
|
136 |
"byte_fallback": true,
|
|
|
137 |
"vocab": {
|
138 |
"<unk>": 0,
|
139 |
"<s>": 1,
|
|
|
134 |
"end_of_word_suffix": null,
|
135 |
"fuse_unk": true,
|
136 |
"byte_fallback": true,
|
137 |
+
"ignore_merges": false,
|
138 |
"vocab": {
|
139 |
"<unk>": 0,
|
140 |
"<s>": 1,
|
train_results.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
|
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 0.9972602739726028,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.41821049595927146,
|
5 |
+
"train_runtime": 5820.0845,
|
6 |
+
"train_samples": 46672,
|
7 |
+
"train_samples_per_second": 8.019,
|
8 |
+
"train_steps_per_second": 0.031
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"logits/chosen": -0.
|
16 |
-
"logits/rejected": -0.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
-
"loss": 0.
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
@@ -24,290 +24,302 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate":
|
30 |
-
"logits/chosen": -0.
|
31 |
-
"logits/rejected": -0.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch": 0.
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate":
|
45 |
-
"logits/chosen": -0.
|
46 |
-
"logits/rejected": -0.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected": 0.
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 0.
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate":
|
60 |
-
"logits/chosen": -0.
|
61 |
-
"logits/rejected": -0.
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen": 0.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -0.
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 0.
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate":
|
75 |
-
"logits/chosen": -0.
|
76 |
-
"logits/rejected": -0.
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen":
|
82 |
-
"rewards/margins":
|
83 |
-
"rewards/rejected": -0.
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 0.
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate":
|
90 |
-
"logits/chosen": -0.
|
91 |
-
"logits/rejected": -0.
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen":
|
97 |
-
"rewards/margins":
|
98 |
-
"rewards/rejected":
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
-
"epoch": 0.
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate":
|
105 |
-
"logits/chosen": -0.
|
106 |
-
"logits/rejected": -0.
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen":
|
112 |
-
"rewards/margins":
|
113 |
-
"rewards/rejected": -0.
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 0.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate":
|
120 |
-
"logits/chosen": -
|
121 |
-
"logits/rejected": -0.
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen":
|
127 |
-
"rewards/margins":
|
128 |
-
"rewards/rejected": 0.
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate":
|
135 |
-
"logits/chosen": -0.
|
136 |
-
"logits/rejected": -0.
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen":
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected": 0.
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
-
"epoch": 0.
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate":
|
150 |
-
"logits/chosen": -
|
151 |
-
"logits/rejected": -0.
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": 1.
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected": 0.
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate":
|
165 |
-
"logits/chosen": -0.
|
166 |
-
"logits/rejected": -0.
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen": 1.
|
172 |
-
"rewards/margins": 1.
|
173 |
-
"rewards/rejected": 0.
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
-
"epoch": 0.
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate":
|
180 |
-
"logits/chosen": -0.
|
181 |
-
"logits/rejected": -0.
|
182 |
-
"logps/chosen": -
|
183 |
-
"logps/rejected": -
|
184 |
-
"loss": 0.
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen": 1.
|
187 |
-
"rewards/margins": 1.
|
188 |
-
"rewards/rejected": -0.
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate":
|
195 |
-
"logits/chosen": -0.
|
196 |
-
"logits/rejected": -0.
|
197 |
-
"logps/chosen": -
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss": 0.
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen": 1.
|
202 |
-
"rewards/margins":
|
203 |
-
"rewards/rejected": 0.
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"logits/chosen": -0.
|
211 |
-
"logits/rejected": -0.
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss": 0.
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen": 1.
|
217 |
-
"rewards/margins": 1.
|
218 |
-
"rewards/rejected": 0.
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -
|
226 |
-
"logits/rejected": -0.
|
227 |
-
"logps/chosen": -
|
228 |
-
"logps/rejected": -
|
229 |
-
"loss": 0.
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen": 1.
|
232 |
-
"rewards/margins":
|
233 |
-
"rewards/rejected": 0.
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -0.
|
241 |
-
"logits/rejected": -0.
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss": 0.
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen": 1.
|
247 |
-
"rewards/margins": 1.
|
248 |
-
"rewards/rejected": -0.
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"grad_norm":
|
254 |
-
"learning_rate":
|
255 |
-
"logits/chosen": -0.
|
256 |
-
"logits/rejected": -0.
|
257 |
-
"logps/chosen": -
|
258 |
-
"logps/rejected": -
|
259 |
-
"loss": 0.
|
260 |
-
"rewards/accuracies": 0.
|
261 |
-
"rewards/chosen": 1.
|
262 |
-
"rewards/margins":
|
263 |
-
"rewards/rejected": -0.
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
-
"epoch": 0.
|
268 |
-
"grad_norm":
|
269 |
-
"learning_rate":
|
270 |
-
"logits/chosen": -0.
|
271 |
-
"logits/rejected": -0.
|
272 |
-
"logps/chosen": -
|
273 |
-
"logps/rejected": -
|
274 |
-
"loss": 0.
|
275 |
-
"rewards/accuracies": 0.
|
276 |
-
"rewards/chosen":
|
277 |
-
"rewards/margins":
|
278 |
-
"rewards/rejected": -0.
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
-
"epoch": 0.
|
283 |
-
"grad_norm":
|
284 |
-
"learning_rate":
|
285 |
-
"logits/chosen": -0.
|
286 |
-
"logits/rejected": -0.
|
287 |
-
"logps/chosen": -
|
288 |
-
"logps/rejected": -
|
289 |
-
"loss": 0.
|
290 |
-
"rewards/accuracies": 0.
|
291 |
-
"rewards/chosen": 1.
|
292 |
-
"rewards/margins":
|
293 |
-
"rewards/rejected": -0.
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
-
"epoch":
|
298 |
-
"step":
|
299 |
"total_flos": 0.0,
|
300 |
-
"train_loss": 0.
|
301 |
-
"train_runtime":
|
302 |
-
"train_samples_per_second": 8.
|
303 |
-
"train_steps_per_second": 0.
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
-
"max_steps":
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
"total_flos": 0.0,
|
312 |
"train_batch_size": 8,
|
313 |
"trial_name": null,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9972602739726028,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 182,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.005479452054794521,
|
13 |
+
"grad_norm": 749.8344019634822,
|
14 |
+
"learning_rate": 5.263157894736841e-09,
|
15 |
+
"logits/chosen": -0.7030794620513916,
|
16 |
+
"logits/rejected": -0.3951629400253296,
|
17 |
+
"logps/chosen": -341.73382568359375,
|
18 |
+
"logps/rejected": -292.9862060546875,
|
19 |
+
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.0547945205479452,
|
28 |
+
"grad_norm": 712.5612854291265,
|
29 |
+
"learning_rate": 5.2631578947368416e-08,
|
30 |
+
"logits/chosen": -0.8543994426727295,
|
31 |
+
"logits/rejected": -0.5004442930221558,
|
32 |
+
"logps/chosen": -343.0475158691406,
|
33 |
+
"logps/rejected": -297.69866943359375,
|
34 |
+
"loss": 0.7047,
|
35 |
+
"rewards/accuracies": 0.4652777910232544,
|
36 |
+
"rewards/chosen": 0.028890050947666168,
|
37 |
+
"rewards/margins": 0.015952428802847862,
|
38 |
+
"rewards/rejected": 0.012937622144818306,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.1095890410958904,
|
43 |
+
"grad_norm": 633.132107951183,
|
44 |
+
"learning_rate": 9.999071352056673e-08,
|
45 |
+
"logits/chosen": -0.8784568905830383,
|
46 |
+
"logits/rejected": -0.5061537027359009,
|
47 |
+
"logps/chosen": -333.73223876953125,
|
48 |
+
"logps/rejected": -288.03704833984375,
|
49 |
+
"loss": 0.6532,
|
50 |
+
"rewards/accuracies": 0.59375,
|
51 |
+
"rewards/chosen": -0.058163970708847046,
|
52 |
+
"rewards/margins": 0.1349145770072937,
|
53 |
+
"rewards/rejected": -0.19307854771614075,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.1643835616438356,
|
58 |
+
"grad_norm": 498.73085461545014,
|
59 |
+
"learning_rate": 9.888050389939171e-08,
|
60 |
+
"logits/chosen": -0.9048949480056763,
|
61 |
+
"logits/rejected": -0.5001607537269592,
|
62 |
+
"logps/chosen": -331.8179626464844,
|
63 |
+
"logps/rejected": -290.59527587890625,
|
64 |
+
"loss": 0.5232,
|
65 |
+
"rewards/accuracies": 0.7593749761581421,
|
66 |
+
"rewards/chosen": 0.2456372082233429,
|
67 |
+
"rewards/margins": 0.7688120603561401,
|
68 |
+
"rewards/rejected": -0.5231748819351196,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.2191780821917808,
|
73 |
+
"grad_norm": 468.38328488591065,
|
74 |
+
"learning_rate": 9.596014080776422e-08,
|
75 |
+
"logits/chosen": -0.8753167986869812,
|
76 |
+
"logits/rejected": -0.5111785531044006,
|
77 |
+
"logps/chosen": -335.57745361328125,
|
78 |
+
"logps/rejected": -306.9455261230469,
|
79 |
+
"loss": 0.4522,
|
80 |
+
"rewards/accuracies": 0.8218749761581421,
|
81 |
+
"rewards/chosen": 1.0787924528121948,
|
82 |
+
"rewards/margins": 1.3148835897445679,
|
83 |
+
"rewards/rejected": -0.23609113693237305,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.273972602739726,
|
88 |
+
"grad_norm": 405.49075569087563,
|
89 |
+
"learning_rate": 9.133777176798013e-08,
|
90 |
+
"logits/chosen": -0.9362503886222839,
|
91 |
+
"logits/rejected": -0.493760883808136,
|
92 |
+
"logps/chosen": -329.0995178222656,
|
93 |
+
"logps/rejected": -294.7619323730469,
|
94 |
+
"loss": 0.4065,
|
95 |
+
"rewards/accuracies": 0.8374999761581421,
|
96 |
+
"rewards/chosen": 2.0607025623321533,
|
97 |
+
"rewards/margins": 1.7504669427871704,
|
98 |
+
"rewards/rejected": 0.3102358281612396,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
+
"epoch": 0.3287671232876712,
|
103 |
+
"grad_norm": 531.0849294440985,
|
104 |
+
"learning_rate": 8.518457335743925e-08,
|
105 |
+
"logits/chosen": -0.8332949876785278,
|
106 |
+
"logits/rejected": -0.44049328565597534,
|
107 |
+
"logps/chosen": -328.05059814453125,
|
108 |
+
"logps/rejected": -295.0623474121094,
|
109 |
+
"loss": 0.397,
|
110 |
+
"rewards/accuracies": 0.824999988079071,
|
111 |
+
"rewards/chosen": 1.8471581935882568,
|
112 |
+
"rewards/margins": 1.8996388912200928,
|
113 |
+
"rewards/rejected": -0.05248071998357773,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.3835616438356164,
|
118 |
+
"grad_norm": 410.01119928479034,
|
119 |
+
"learning_rate": 7.772841216033532e-08,
|
120 |
+
"logits/chosen": -1.053856611251831,
|
121 |
+
"logits/rejected": -0.6151344776153564,
|
122 |
+
"logps/chosen": -347.8240966796875,
|
123 |
+
"logps/rejected": -304.7304382324219,
|
124 |
+
"loss": 0.3992,
|
125 |
+
"rewards/accuracies": 0.793749988079071,
|
126 |
+
"rewards/chosen": 1.6472972631454468,
|
127 |
+
"rewards/margins": 1.7940982580184937,
|
128 |
+
"rewards/rejected": -0.14680106937885284,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.4383561643835616,
|
133 |
+
"grad_norm": 422.6505304779972,
|
134 |
+
"learning_rate": 6.924540636266271e-08,
|
135 |
+
"logits/chosen": -0.9396562576293945,
|
136 |
+
"logits/rejected": -0.3743361532688141,
|
137 |
+
"logps/chosen": -341.07342529296875,
|
138 |
+
"logps/rejected": -294.79632568359375,
|
139 |
+
"loss": 0.3788,
|
140 |
+
"rewards/accuracies": 0.8374999761581421,
|
141 |
+
"rewards/chosen": 1.4693505764007568,
|
142 |
+
"rewards/margins": 1.931475043296814,
|
143 |
+
"rewards/rejected": -0.46212440729141235,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
+
"epoch": 0.4931506849315068,
|
148 |
+
"grad_norm": 435.4943837818754,
|
149 |
+
"learning_rate": 6.004970048339225e-08,
|
150 |
+
"logits/chosen": -1.0359487533569336,
|
151 |
+
"logits/rejected": -0.6516568064689636,
|
152 |
+
"logps/chosen": -337.1309509277344,
|
153 |
+
"logps/rejected": -307.5055847167969,
|
154 |
+
"loss": 0.3792,
|
155 |
+
"rewards/accuracies": 0.8218749761581421,
|
156 |
+
"rewards/chosen": 1.8926198482513428,
|
157 |
+
"rewards/margins": 1.9282394647598267,
|
158 |
+
"rewards/rejected": -0.03561948612332344,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.547945205479452,
|
163 |
+
"grad_norm": 424.3608116994891,
|
164 |
+
"learning_rate": 5.0481831906199036e-08,
|
165 |
+
"logits/chosen": -0.8561469316482544,
|
166 |
+
"logits/rejected": -0.5456556081771851,
|
167 |
+
"logps/chosen": -315.7440185546875,
|
168 |
+
"logps/rejected": -284.4901428222656,
|
169 |
+
"loss": 0.3607,
|
170 |
+
"rewards/accuracies": 0.796875,
|
171 |
+
"rewards/chosen": 1.5119119882583618,
|
172 |
+
"rewards/margins": 1.8389956951141357,
|
173 |
+
"rewards/rejected": -0.3270837962627411,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 0.6027397260273972,
|
178 |
+
"grad_norm": 376.9480565248481,
|
179 |
+
"learning_rate": 4.0896120024874283e-08,
|
180 |
+
"logits/chosen": -0.7713192105293274,
|
181 |
+
"logits/rejected": -0.4743874967098236,
|
182 |
+
"logps/chosen": -332.10638427734375,
|
183 |
+
"logps/rejected": -302.2787780761719,
|
184 |
+
"loss": 0.3675,
|
185 |
+
"rewards/accuracies": 0.831250011920929,
|
186 |
+
"rewards/chosen": 1.8271310329437256,
|
187 |
+
"rewards/margins": 1.9777942895889282,
|
188 |
+
"rewards/rejected": -0.15066327154636383,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.6575342465753424,
|
193 |
+
"grad_norm": 452.4360162863762,
|
194 |
+
"learning_rate": 3.1647545010335394e-08,
|
195 |
+
"logits/chosen": -0.8479810953140259,
|
196 |
+
"logits/rejected": -0.47955965995788574,
|
197 |
+
"logps/chosen": -345.54071044921875,
|
198 |
+
"logps/rejected": -308.65179443359375,
|
199 |
+
"loss": 0.3527,
|
200 |
+
"rewards/accuracies": 0.840624988079071,
|
201 |
+
"rewards/chosen": 1.8864860534667969,
|
202 |
+
"rewards/margins": 2.1840381622314453,
|
203 |
+
"rewards/rejected": -0.2975522577762604,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.7123287671232876,
|
208 |
+
"grad_norm": 315.5021095438557,
|
209 |
+
"learning_rate": 2.3078602107593897e-08,
|
210 |
+
"logits/chosen": -0.8250066041946411,
|
211 |
+
"logits/rejected": -0.49061331152915955,
|
212 |
+
"logps/chosen": -325.8185119628906,
|
213 |
+
"logps/rejected": -298.6694030761719,
|
214 |
+
"loss": 0.3638,
|
215 |
+
"rewards/accuracies": 0.7906249761581421,
|
216 |
+
"rewards/chosen": 1.3761605024337769,
|
217 |
+
"rewards/margins": 1.7671406269073486,
|
218 |
+
"rewards/rejected": -0.3909802734851837,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.7671232876712328,
|
223 |
+
"grad_norm": 420.91275678465917,
|
224 |
+
"learning_rate": 1.5506618277219406e-08,
|
225 |
+
"logits/chosen": -1.0089247226715088,
|
226 |
+
"logits/rejected": -0.5239464044570923,
|
227 |
+
"logps/chosen": -334.70111083984375,
|
228 |
+
"logps/rejected": -293.3878479003906,
|
229 |
+
"loss": 0.3607,
|
230 |
+
"rewards/accuracies": 0.8343750238418579,
|
231 |
+
"rewards/chosen": 1.8048830032348633,
|
232 |
+
"rewards/margins": 2.049487829208374,
|
233 |
+
"rewards/rejected": -0.2446049153804779,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.821917808219178,
|
238 |
+
"grad_norm": 450.70186106616944,
|
239 |
+
"learning_rate": 9.212000874196951e-09,
|
240 |
+
"logits/chosen": -0.9020761251449585,
|
241 |
+
"logits/rejected": -0.5833565592765808,
|
242 |
+
"logps/chosen": -328.1164245605469,
|
243 |
+
"logps/rejected": -298.4231872558594,
|
244 |
+
"loss": 0.349,
|
245 |
+
"rewards/accuracies": 0.84375,
|
246 |
+
"rewards/chosen": 1.6165488958358765,
|
247 |
+
"rewards/margins": 1.8227107524871826,
|
248 |
+
"rewards/rejected": -0.20616202056407928,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.8767123287671232,
|
253 |
+
"grad_norm": 399.4981855057766,
|
254 |
+
"learning_rate": 4.427853541662091e-09,
|
255 |
+
"logits/chosen": -0.9409669041633606,
|
256 |
+
"logits/rejected": -0.5335644483566284,
|
257 |
+
"logps/chosen": -337.31488037109375,
|
258 |
+
"logps/rejected": -302.06365966796875,
|
259 |
+
"loss": 0.3548,
|
260 |
+
"rewards/accuracies": 0.846875011920929,
|
261 |
+
"rewards/chosen": 1.994319200515747,
|
262 |
+
"rewards/margins": 2.289970636367798,
|
263 |
+
"rewards/rejected": -0.29565146565437317,
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
+
"epoch": 0.9315068493150684,
|
268 |
+
"grad_norm": 381.8223301695622,
|
269 |
+
"learning_rate": 1.3313438659999398e-09,
|
270 |
+
"logits/chosen": -0.6689538955688477,
|
271 |
+
"logits/rejected": -0.37201908230781555,
|
272 |
+
"logps/chosen": -323.4323425292969,
|
273 |
+
"logps/rejected": -292.47857666015625,
|
274 |
+
"loss": 0.3629,
|
275 |
+
"rewards/accuracies": 0.840624988079071,
|
276 |
+
"rewards/chosen": 2.0121166706085205,
|
277 |
+
"rewards/margins": 2.282526731491089,
|
278 |
+
"rewards/rejected": -0.2704099416732788,
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
+
"epoch": 0.9863013698630136,
|
283 |
+
"grad_norm": 380.7644210750603,
|
284 |
+
"learning_rate": 3.7142468185014095e-11,
|
285 |
+
"logits/chosen": -0.8619197010993958,
|
286 |
+
"logits/rejected": -0.5339924097061157,
|
287 |
+
"logps/chosen": -319.94158935546875,
|
288 |
+
"logps/rejected": -290.5792236328125,
|
289 |
+
"loss": 0.367,
|
290 |
+
"rewards/accuracies": 0.8500000238418579,
|
291 |
+
"rewards/chosen": 1.9236304759979248,
|
292 |
+
"rewards/margins": 2.2062630653381348,
|
293 |
+
"rewards/rejected": -0.2826324701309204,
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
+
"epoch": 0.9972602739726028,
|
298 |
+
"step": 182,
|
299 |
"total_flos": 0.0,
|
300 |
+
"train_loss": 0.41821049595927146,
|
301 |
+
"train_runtime": 5820.0845,
|
302 |
+
"train_samples_per_second": 8.019,
|
303 |
+
"train_steps_per_second": 0.031
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
+
"max_steps": 182,
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
311 |
+
"stateful_callbacks": {
|
312 |
+
"TrainerControl": {
|
313 |
+
"args": {
|
314 |
+
"should_epoch_stop": false,
|
315 |
+
"should_evaluate": false,
|
316 |
+
"should_log": false,
|
317 |
+
"should_save": true,
|
318 |
+
"should_training_stop": false
|
319 |
+
},
|
320 |
+
"attributes": {}
|
321 |
+
}
|
322 |
+
},
|
323 |
"total_flos": 0.0,
|
324 |
"train_batch_size": 8,
|
325 |
"trial_name": null,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91a455adcc1dd46693a6fa3188bc76c135951e0b0806108b876f1cac5c69def0
|
3 |
+
size 6520
|