Model save
Browse files- README.md +2 -2
- all_results.json +6 -5
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun05_16-45-33_n136-082-130/events.out.tfevents.1717577207.n136-082-130.1671269.0 +2 -2
- train_results.json +6 -5
- trainer_state.json +231 -234
README.md
CHANGED
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
-
- Transformers 4.
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
-
- Tokenizers 0.
|
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
+
- Transformers 4.41.1
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
+
- Tokenizers 0.19.1
|
all_results.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
|
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.4268451908656529,
|
5 |
+
"train_runtime": 5571.7779,
|
6 |
+
"train_samples": 44755,
|
7 |
+
"train_samples_per_second": 8.032,
|
8 |
+
"train_steps_per_second": 0.031
|
9 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
+
"transformers_version": "4.41.1"
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943178720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee8452a398b257c4a92a8643d9b1dfe39769d290e7c90c76b86b619836911c45
|
3 |
size 4943178720
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee6a898b9636aed352abe6efb776fc9d203259465cf8a6ffff1063f02fa01257
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540532728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f22fcd73a88b87f23b169dac783932266e459f67992280e6211fdc500dbf6a1d
|
3 |
size 4540532728
|
runs/Jun05_16-45-33_n136-082-130/events.out.tfevents.1717577207.n136-082-130.1671269.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:677d4dc19859dd093eed3aeb0022a132de8f6ce1d85c343d2af84f7e925a454c
|
3 |
+
size 17729
|
train_results.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
|
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.4268451908656529,
|
5 |
+
"train_runtime": 5571.7779,
|
6 |
+
"train_samples": 44755,
|
7 |
+
"train_samples_per_second": 8.032,
|
8 |
+
"train_steps_per_second": 0.031
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 2.
|
15 |
-
"logits/chosen": -4.
|
16 |
-
"logits/rejected": -4.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
-
"loss": 0.
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
@@ -24,290 +24,287 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate": 2.
|
30 |
-
"logits/chosen": -4.
|
31 |
-
"logits/rejected": -4.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
"rewards/accuracies": 0.5034722089767456,
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch": 0.
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate": 4.
|
45 |
-
"logits/chosen": -4.
|
46 |
-
"logits/rejected": -4.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen":
|
52 |
-
"rewards/margins":
|
53 |
-
"rewards/rejected":
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 0.
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate": 4.
|
60 |
-
"logits/chosen": -4.
|
61 |
-
"logits/rejected": -
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen":
|
67 |
-
"rewards/margins":
|
68 |
-
"rewards/rejected": -
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 0.
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate": 4.
|
75 |
-
"logits/chosen": -4.
|
76 |
-
"logits/rejected": -
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen":
|
82 |
-
"rewards/margins":
|
83 |
-
"rewards/rejected": -
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 0.
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate": 4.
|
90 |
-
"logits/chosen": -
|
91 |
-
"logits/rejected": -
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen":
|
97 |
-
"rewards/margins":
|
98 |
-
"rewards/rejected": -
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
-
"epoch": 0.
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate": 4.
|
105 |
-
"logits/chosen": -
|
106 |
-
"logits/rejected": -
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen":
|
112 |
-
"rewards/margins":
|
113 |
-
"rewards/rejected": -2.
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 0.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate": 3.
|
120 |
-
"logits/chosen": -
|
121 |
-
"logits/rejected": -
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen":
|
127 |
-
"rewards/margins":
|
128 |
-
"rewards/rejected": -3.
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate": 3.
|
135 |
-
"logits/chosen": -
|
136 |
-
"logits/rejected": -
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen":
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected": -
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
-
"epoch": 0.
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate":
|
150 |
-
"logits/chosen": -
|
151 |
-
"logits/rejected": -
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen":
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected": -
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate": 2.
|
165 |
-
"logits/chosen": -
|
166 |
-
"logits/rejected": -
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen":
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected": -
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
-
"epoch": 0.
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate":
|
180 |
-
"logits/chosen": -
|
181 |
-
"logits/rejected": -
|
182 |
-
"logps/chosen": -
|
183 |
-
"logps/rejected": -
|
184 |
-
"loss": 0.
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen":
|
187 |
-
"rewards/margins":
|
188 |
-
"rewards/rejected": -
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate": 1.
|
195 |
-
"logits/chosen": -
|
196 |
-
"logits/rejected": -
|
197 |
-
"logps/chosen": -
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss": 0.
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen":
|
202 |
-
"rewards/margins":
|
203 |
-
"rewards/rejected": -
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"logits/chosen": -
|
211 |
-
"logits/rejected": -
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss": 0.
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen":
|
217 |
-
"rewards/margins":
|
218 |
-
"rewards/rejected": -
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -
|
226 |
-
"logits/rejected": -
|
227 |
-
"logps/chosen": -
|
228 |
-
"logps/rejected": -
|
229 |
-
"loss": 0.
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen":
|
232 |
-
"rewards/margins":
|
233 |
-
"rewards/rejected": -
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -
|
241 |
-
"logits/rejected": -
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss": 0.
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen":
|
247 |
-
"rewards/margins":
|
248 |
-
"rewards/rejected": -
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"grad_norm":
|
254 |
-
"learning_rate":
|
255 |
-
"logits/chosen": -
|
256 |
-
"logits/rejected": -
|
257 |
-
"logps/chosen": -
|
258 |
-
"logps/rejected": -
|
259 |
-
"loss": 0.
|
260 |
-
"rewards/accuracies": 0.
|
261 |
-
"rewards/chosen":
|
262 |
-
"rewards/margins":
|
263 |
-
"rewards/rejected": -
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
-
"epoch": 0.
|
268 |
-
"grad_norm":
|
269 |
-
"learning_rate": 1.
|
270 |
-
"logits/chosen": -
|
271 |
-
"logits/rejected": -
|
272 |
-
"logps/chosen": -
|
273 |
-
"logps/rejected": -
|
274 |
-
"loss": 0.
|
275 |
-
"rewards/accuracies": 0.
|
276 |
-
"rewards/chosen":
|
277 |
-
"rewards/margins":
|
278 |
-
"rewards/rejected": -
|
279 |
"step": 170
|
280 |
},
|
281 |
-
{
|
282 |
-
"epoch": 0.95,
|
283 |
-
"grad_norm": 29.32551345390984,
|
284 |
-
"learning_rate": 3.4498131616493565e-09,
|
285 |
-
"logits/chosen": -6.939836025238037,
|
286 |
-
"logits/rejected": -7.576680660247803,
|
287 |
-
"logps/chosen": -514.7128295898438,
|
288 |
-
"logps/rejected": -656.9924926757812,
|
289 |
-
"loss": 0.3518,
|
290 |
-
"rewards/accuracies": 0.8031250238418579,
|
291 |
-
"rewards/chosen": -2.932391881942749,
|
292 |
-
"rewards/margins": 1.7769733667373657,
|
293 |
-
"rewards/rejected": -4.709364891052246,
|
294 |
-
"step": 180
|
295 |
-
},
|
296 |
{
|
297 |
"epoch": 1.0,
|
298 |
-
"step":
|
299 |
"total_flos": 0.0,
|
300 |
-
"train_loss": 0.
|
301 |
-
"train_runtime":
|
302 |
-
"train_samples_per_second":
|
303 |
-
"train_steps_per_second": 0.
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
-
"max_steps":
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
"total_flos": 0.0,
|
312 |
"train_batch_size": 8,
|
313 |
"trial_name": null,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 175,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.005714285714285714,
|
13 |
+
"grad_norm": 1251.908438964567,
|
14 |
+
"learning_rate": 2.7777777777777774e-08,
|
15 |
+
"logits/chosen": -4.099947929382324,
|
16 |
+
"logits/rejected": -4.528928756713867,
|
17 |
+
"logps/chosen": -297.4884033203125,
|
18 |
+
"logps/rejected": -227.07449340820312,
|
19 |
+
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.05714285714285714,
|
28 |
+
"grad_norm": 1007.5595895273253,
|
29 |
+
"learning_rate": 2.7777777777777776e-07,
|
30 |
+
"logits/chosen": -4.256350040435791,
|
31 |
+
"logits/rejected": -4.503963947296143,
|
32 |
+
"logps/chosen": -316.07769775390625,
|
33 |
+
"logps/rejected": -254.57467651367188,
|
34 |
+
"loss": 0.6613,
|
35 |
"rewards/accuracies": 0.5034722089767456,
|
36 |
+
"rewards/chosen": 0.2096220850944519,
|
37 |
+
"rewards/margins": 0.15642070770263672,
|
38 |
+
"rewards/rejected": 0.0532013401389122,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.11428571428571428,
|
43 |
+
"grad_norm": 675.3841086149566,
|
44 |
+
"learning_rate": 4.997998237821233e-07,
|
45 |
+
"logits/chosen": -4.360010623931885,
|
46 |
+
"logits/rejected": -4.628513813018799,
|
47 |
+
"logps/chosen": -298.9122009277344,
|
48 |
+
"logps/rejected": -249.00918579101562,
|
49 |
+
"loss": 0.4212,
|
50 |
+
"rewards/accuracies": 0.800000011920929,
|
51 |
+
"rewards/chosen": 3.4100475311279297,
|
52 |
+
"rewards/margins": 2.592763900756836,
|
53 |
+
"rewards/rejected": 0.8172838091850281,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.17142857142857143,
|
58 |
+
"grad_norm": 675.1555265980924,
|
59 |
+
"learning_rate": 4.928272579403969e-07,
|
60 |
+
"logits/chosen": -4.373316287994385,
|
61 |
+
"logits/rejected": -4.6160383224487305,
|
62 |
+
"logps/chosen": -303.8053894042969,
|
63 |
+
"logps/rejected": -266.44818115234375,
|
64 |
+
"loss": 0.4762,
|
65 |
+
"rewards/accuracies": 0.824999988079071,
|
66 |
+
"rewards/chosen": 4.110724449157715,
|
67 |
+
"rewards/margins": 6.506677150726318,
|
68 |
+
"rewards/rejected": -2.3959527015686035,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.22857142857142856,
|
73 |
+
"grad_norm": 600.7834437052495,
|
74 |
+
"learning_rate": 4.7616414547743854e-07,
|
75 |
+
"logits/chosen": -4.35813045501709,
|
76 |
+
"logits/rejected": -4.55276346206665,
|
77 |
+
"logps/chosen": -289.32257080078125,
|
78 |
+
"logps/rejected": -250.25341796875,
|
79 |
+
"loss": 0.5303,
|
80 |
+
"rewards/accuracies": 0.8343750238418579,
|
81 |
+
"rewards/chosen": 3.9142494201660156,
|
82 |
+
"rewards/margins": 7.811418056488037,
|
83 |
+
"rewards/rejected": -3.897169589996338,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.2857142857142857,
|
88 |
+
"grad_norm": 788.2365830395779,
|
89 |
+
"learning_rate": 4.5047546391491e-07,
|
90 |
+
"logits/chosen": -4.276906967163086,
|
91 |
+
"logits/rejected": -4.5039567947387695,
|
92 |
+
"logps/chosen": -297.9548034667969,
|
93 |
+
"logps/rejected": -260.8029479980469,
|
94 |
+
"loss": 0.4673,
|
95 |
+
"rewards/accuracies": 0.8531249761581421,
|
96 |
+
"rewards/chosen": 4.0600905418396,
|
97 |
+
"rewards/margins": 7.4909186363220215,
|
98 |
+
"rewards/rejected": -3.430828094482422,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
+
"epoch": 0.34285714285714286,
|
103 |
+
"grad_norm": 715.3302112367288,
|
104 |
+
"learning_rate": 4.167863756189767e-07,
|
105 |
+
"logits/chosen": -4.322784900665283,
|
106 |
+
"logits/rejected": -4.564073085784912,
|
107 |
+
"logps/chosen": -293.1005554199219,
|
108 |
+
"logps/rejected": -254.21835327148438,
|
109 |
+
"loss": 0.4621,
|
110 |
+
"rewards/accuracies": 0.893750011920929,
|
111 |
+
"rewards/chosen": 5.134177207946777,
|
112 |
+
"rewards/margins": 7.875572204589844,
|
113 |
+
"rewards/rejected": -2.741394519805908,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.4,
|
118 |
+
"grad_norm": 633.4373267135044,
|
119 |
+
"learning_rate": 3.764413164801049e-07,
|
120 |
+
"logits/chosen": -4.282719612121582,
|
121 |
+
"logits/rejected": -4.559357643127441,
|
122 |
+
"logps/chosen": -287.670166015625,
|
123 |
+
"logps/rejected": -240.59359741210938,
|
124 |
+
"loss": 0.3978,
|
125 |
+
"rewards/accuracies": 0.8812500238418579,
|
126 |
+
"rewards/chosen": 4.107884407043457,
|
127 |
+
"rewards/margins": 7.409787654876709,
|
128 |
+
"rewards/rejected": -3.301903247833252,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.45714285714285713,
|
133 |
+
"grad_norm": 585.2050073161457,
|
134 |
+
"learning_rate": 3.3105034329273217e-07,
|
135 |
+
"logits/chosen": -4.1827239990234375,
|
136 |
+
"logits/rejected": -4.454409599304199,
|
137 |
+
"logps/chosen": -294.9931640625,
|
138 |
+
"logps/rejected": -253.04092407226562,
|
139 |
+
"loss": 0.4435,
|
140 |
+
"rewards/accuracies": 0.828125,
|
141 |
+
"rewards/chosen": 4.6706342697143555,
|
142 |
+
"rewards/margins": 7.3439764976501465,
|
143 |
+
"rewards/rejected": -2.67334246635437,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
+
"epoch": 0.5142857142857142,
|
148 |
+
"grad_norm": 744.3685065355755,
|
149 |
+
"learning_rate": 2.8242488095860204e-07,
|
150 |
+
"logits/chosen": -4.200292110443115,
|
151 |
+
"logits/rejected": -4.432915210723877,
|
152 |
+
"logps/chosen": -289.46466064453125,
|
153 |
+
"logps/rejected": -249.84048461914062,
|
154 |
+
"loss": 0.4059,
|
155 |
+
"rewards/accuracies": 0.8656250238418579,
|
156 |
+
"rewards/chosen": 5.276065349578857,
|
157 |
+
"rewards/margins": 7.3948163986206055,
|
158 |
+
"rewards/rejected": -2.118751287460327,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.5714285714285714,
|
163 |
+
"grad_norm": 671.6213883992457,
|
164 |
+
"learning_rate": 2.3250543366050071e-07,
|
165 |
+
"logits/chosen": -4.27265739440918,
|
166 |
+
"logits/rejected": -4.471877098083496,
|
167 |
+
"logps/chosen": -299.2139892578125,
|
168 |
+
"logps/rejected": -262.4172668457031,
|
169 |
+
"loss": 0.3587,
|
170 |
+
"rewards/accuracies": 0.8812500238418579,
|
171 |
+
"rewards/chosen": 5.20701265335083,
|
172 |
+
"rewards/margins": 7.200909614562988,
|
173 |
+
"rewards/rejected": -1.9938958883285522,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 0.6285714285714286,
|
178 |
+
"grad_norm": 731.7704645558294,
|
179 |
+
"learning_rate": 1.8328414484826743e-07,
|
180 |
+
"logits/chosen": -4.198658466339111,
|
181 |
+
"logits/rejected": -4.5151848793029785,
|
182 |
+
"logps/chosen": -299.356689453125,
|
183 |
+
"logps/rejected": -248.8483428955078,
|
184 |
+
"loss": 0.3809,
|
185 |
+
"rewards/accuracies": 0.893750011920929,
|
186 |
+
"rewards/chosen": 5.6889519691467285,
|
187 |
+
"rewards/margins": 7.738437652587891,
|
188 |
+
"rewards/rejected": -2.049485683441162,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.6857142857142857,
|
193 |
+
"grad_norm": 763.5659705732334,
|
194 |
+
"learning_rate": 1.3672529644823003e-07,
|
195 |
+
"logits/chosen": -4.348945140838623,
|
196 |
+
"logits/rejected": -4.604073524475098,
|
197 |
+
"logps/chosen": -269.67547607421875,
|
198 |
+
"logps/rejected": -229.8912811279297,
|
199 |
+
"loss": 0.3889,
|
200 |
+
"rewards/accuracies": 0.8656250238418579,
|
201 |
+
"rewards/chosen": 4.305537700653076,
|
202 |
+
"rewards/margins": 6.874751091003418,
|
203 |
+
"rewards/rejected": -2.569213390350342,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.7428571428571429,
|
208 |
+
"grad_norm": 609.1110882142142,
|
209 |
+
"learning_rate": 9.468691994696146e-08,
|
210 |
+
"logits/chosen": -4.341274261474609,
|
211 |
+
"logits/rejected": -4.5989298820495605,
|
212 |
+
"logps/chosen": -275.35833740234375,
|
213 |
+
"logps/rejected": -245.8115692138672,
|
214 |
+
"loss": 0.3699,
|
215 |
+
"rewards/accuracies": 0.871874988079071,
|
216 |
+
"rewards/chosen": 4.084762096405029,
|
217 |
+
"rewards/margins": 6.567566871643066,
|
218 |
+
"rewards/rejected": -2.4828040599823,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.8,
|
223 |
+
"grad_norm": 833.6810153426302,
|
224 |
+
"learning_rate": 5.884664762850466e-08,
|
225 |
+
"logits/chosen": -4.367494106292725,
|
226 |
+
"logits/rejected": -4.598031044006348,
|
227 |
+
"logps/chosen": -273.83099365234375,
|
228 |
+
"logps/rejected": -237.6991729736328,
|
229 |
+
"loss": 0.381,
|
230 |
+
"rewards/accuracies": 0.8656250238418579,
|
231 |
+
"rewards/chosen": 4.479451656341553,
|
232 |
+
"rewards/margins": 6.597804069519043,
|
233 |
+
"rewards/rejected": -2.118351697921753,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.8571428571428571,
|
238 |
+
"grad_norm": 677.0337306190108,
|
239 |
+
"learning_rate": 3.063476303172388e-08,
|
240 |
+
"logits/chosen": -4.267864227294922,
|
241 |
+
"logits/rejected": -4.488691806793213,
|
242 |
+
"logps/chosen": -288.9342041015625,
|
243 |
+
"logps/rejected": -247.3463592529297,
|
244 |
+
"loss": 0.392,
|
245 |
+
"rewards/accuracies": 0.8843749761581421,
|
246 |
+
"rewards/chosen": 5.401379585266113,
|
247 |
+
"rewards/margins": 7.644896030426025,
|
248 |
+
"rewards/rejected": -2.2435173988342285,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.9142857142857143,
|
253 |
+
"grad_norm": 601.5321276048043,
|
254 |
+
"learning_rate": 1.1177122393998372e-08,
|
255 |
+
"logits/chosen": -4.331192970275879,
|
256 |
+
"logits/rejected": -4.530573844909668,
|
257 |
+
"logps/chosen": -277.52020263671875,
|
258 |
+
"logps/rejected": -243.84323120117188,
|
259 |
+
"loss": 0.363,
|
260 |
+
"rewards/accuracies": 0.8687499761581421,
|
261 |
+
"rewards/chosen": 4.819538593292236,
|
262 |
+
"rewards/margins": 6.703360080718994,
|
263 |
+
"rewards/rejected": -1.8838220834732056,
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
+
"epoch": 0.9714285714285714,
|
268 |
+
"grad_norm": 771.9761619985129,
|
269 |
+
"learning_rate": 1.2502249244298879e-09,
|
270 |
+
"logits/chosen": -4.293517112731934,
|
271 |
+
"logits/rejected": -4.548079013824463,
|
272 |
+
"logps/chosen": -295.1907653808594,
|
273 |
+
"logps/rejected": -248.191650390625,
|
274 |
+
"loss": 0.3693,
|
275 |
+
"rewards/accuracies": 0.8843749761581421,
|
276 |
+
"rewards/chosen": 4.456015586853027,
|
277 |
+
"rewards/margins": 6.605706214904785,
|
278 |
+
"rewards/rejected": -2.1496901512145996,
|
279 |
"step": 170
|
280 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
{
|
282 |
"epoch": 1.0,
|
283 |
+
"step": 175,
|
284 |
"total_flos": 0.0,
|
285 |
+
"train_loss": 0.4268451908656529,
|
286 |
+
"train_runtime": 5571.7779,
|
287 |
+
"train_samples_per_second": 8.032,
|
288 |
+
"train_steps_per_second": 0.031
|
289 |
}
|
290 |
],
|
291 |
"logging_steps": 10,
|
292 |
+
"max_steps": 175,
|
293 |
"num_input_tokens_seen": 0,
|
294 |
"num_train_epochs": 1,
|
295 |
"save_steps": 100,
|
296 |
+
"stateful_callbacks": {
|
297 |
+
"TrainerControl": {
|
298 |
+
"args": {
|
299 |
+
"should_epoch_stop": false,
|
300 |
+
"should_evaluate": false,
|
301 |
+
"should_log": false,
|
302 |
+
"should_save": true,
|
303 |
+
"should_training_stop": false
|
304 |
+
},
|
305 |
+
"attributes": {}
|
306 |
+
}
|
307 |
+
},
|
308 |
"total_flos": 0.0,
|
309 |
"train_batch_size": 8,
|
310 |
"trial_name": null,
|