Model save
Browse files- README.md +3 -3
- all_results.json +6 -7
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun07_11-44-37_n136-100-194/events.out.tfevents.1717732216.n136-100-194.1436753.0 +2 -2
- train_results.json +6 -7
- trainer_state.json +257 -209
README.md
CHANGED
@@ -32,7 +32,7 @@ More information needed
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
-
- learning_rate:
|
36 |
- train_batch_size: 8
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
-
- Transformers 4.
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
-
- Tokenizers 0.
|
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 5e-07
|
36 |
- train_batch_size: 8
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
+
- Transformers 4.38.2
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
+
- Tokenizers 0.15.2
|
all_results.json
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"train_steps_per_second": 0.03
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.9975380127246564,
|
4 |
+
"train_runtime": 5482.1546,
|
5 |
+
"train_samples": 49998,
|
6 |
+
"train_samples_per_second": 9.12,
|
7 |
+
"train_steps_per_second": 0.036
|
|
|
8 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
+
"transformers_version": "4.38.2"
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943178720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47640a0478f2bcb7154d26393dc9ccd914f431fd8d23c33ec72d35f3d2adafe7
|
3 |
size 4943178720
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52f69274e88b49d66a73ddb87507cd1bc38954b506db2290b371231064d4f84e
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540532728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd5168e074183b468a8f1504e1ee15cf17cb86551434001e60a29c90de0ac2d5
|
3 |
size 4540532728
|
runs/Jun07_11-44-37_n136-100-194/events.out.tfevents.1717732216.n136-100-194.1436753.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:533a2109507cae2a619e7676a1143322f60bc5df3ca65925183f69a6017d392a
|
3 |
+
size 18853
|
train_results.json
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"train_steps_per_second": 0.03
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.9975380127246564,
|
4 |
+
"train_runtime": 5482.1546,
|
5 |
+
"train_samples": 49998,
|
6 |
+
"train_samples_per_second": 9.12,
|
7 |
+
"train_steps_per_second": 0.036
|
|
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"logits/chosen": -
|
16 |
-
"logits/rejected": -
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
-
"loss": 0.
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
@@ -24,257 +24,305 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate":
|
30 |
-
"logits/chosen": -4.
|
31 |
-
"logits/rejected": -
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen":
|
37 |
-
"rewards/margins":
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch": 0.
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate":
|
45 |
-
"logits/chosen": -4.
|
46 |
-
"logits/rejected": -
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins":
|
53 |
-
"rewards/rejected": 0.
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 0.
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate":
|
60 |
-
"logits/chosen": -4.
|
61 |
-
"logits/rejected": -
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen":
|
67 |
-
"rewards/margins":
|
68 |
-
"rewards/rejected":
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 0.
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate":
|
75 |
-
"logits/chosen": -4.
|
76 |
-
"logits/rejected": -
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen":
|
82 |
-
"rewards/margins":
|
83 |
-
"rewards/rejected":
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 0.
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate":
|
90 |
-
"logits/chosen": -4.
|
91 |
-
"logits/rejected": -
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen":
|
97 |
-
"rewards/margins":
|
98 |
-
"rewards/rejected":
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
-
"epoch": 0.
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate":
|
105 |
-
"logits/chosen": -
|
106 |
-
"logits/rejected": -
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen":
|
112 |
-
"rewards/margins":
|
113 |
-
"rewards/rejected":
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 0.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate":
|
120 |
-
"logits/chosen": -4.
|
121 |
-
"logits/rejected": -
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss":
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen":
|
127 |
-
"rewards/margins":
|
128 |
-
"rewards/rejected":
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate":
|
135 |
-
"logits/chosen": -
|
136 |
-
"logits/rejected": -
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss":
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen":
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected":
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
-
"epoch": 0.
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate":
|
150 |
-
"logits/chosen": -
|
151 |
-
"logits/rejected": -
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss":
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen":
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected":
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate":
|
165 |
-
"logits/chosen": -
|
166 |
-
"logits/rejected": -
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss":
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen":
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected":
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
-
"epoch": 0.
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate": 2.
|
180 |
-
"logits/chosen": -
|
181 |
-
"logits/rejected": -
|
182 |
-
"logps/chosen": -
|
183 |
-
"logps/rejected": -
|
184 |
-
"loss":
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen":
|
187 |
-
"rewards/margins":
|
188 |
-
"rewards/rejected":
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate": 1.
|
195 |
-
"logits/chosen": -4.
|
196 |
-
"logits/rejected": -
|
197 |
-
"logps/chosen": -
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss":
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen":
|
202 |
-
"rewards/margins":
|
203 |
-
"rewards/rejected":
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"logits/chosen": -
|
211 |
-
"logits/rejected": -
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss":
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen":
|
217 |
-
"rewards/margins":
|
218 |
-
"rewards/rejected":
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -
|
226 |
-
"logits/rejected": -
|
227 |
-
"logps/chosen": -
|
228 |
-
"logps/rejected": -
|
229 |
-
"loss":
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen":
|
232 |
-
"rewards/margins":
|
233 |
-
"rewards/rejected":
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -
|
241 |
-
"logits/rejected": -
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss":
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen":
|
247 |
-
"rewards/margins":
|
248 |
-
"rewards/rejected":
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
"total_flos": 0.0,
|
255 |
-
"train_loss": 0.
|
256 |
-
"train_runtime":
|
257 |
-
"train_samples_per_second":
|
258 |
-
"train_steps_per_second": 0.
|
259 |
}
|
260 |
],
|
261 |
"logging_steps": 10,
|
262 |
-
"max_steps":
|
263 |
"num_input_tokens_seen": 0,
|
264 |
"num_train_epochs": 1,
|
265 |
"save_steps": 100,
|
266 |
-
"stateful_callbacks": {
|
267 |
-
"TrainerControl": {
|
268 |
-
"args": {
|
269 |
-
"should_epoch_stop": false,
|
270 |
-
"should_evaluate": false,
|
271 |
-
"should_log": false,
|
272 |
-
"should_save": true,
|
273 |
-
"should_training_stop": false
|
274 |
-
},
|
275 |
-
"attributes": {}
|
276 |
-
}
|
277 |
-
},
|
278 |
"total_flos": 0.0,
|
279 |
"train_batch_size": 8,
|
280 |
"trial_name": null,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9974424552429667,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 195,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.01,
|
13 |
+
"grad_norm": 1310.024749740419,
|
14 |
+
"learning_rate": 2.5e-08,
|
15 |
+
"logits/chosen": -5.0504608154296875,
|
16 |
+
"logits/rejected": -5.35328483581543,
|
17 |
+
"logps/chosen": -242.7239990234375,
|
18 |
+
"logps/rejected": -185.90835571289062,
|
19 |
+
"loss": 0.6893,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.05,
|
28 |
+
"grad_norm": 1343.8700325036616,
|
29 |
+
"learning_rate": 2.5e-07,
|
30 |
+
"logits/chosen": -4.959235191345215,
|
31 |
+
"logits/rejected": -5.051504135131836,
|
32 |
+
"logps/chosen": -226.43630981445312,
|
33 |
+
"logps/rejected": -216.47547912597656,
|
34 |
+
"loss": 0.7205,
|
35 |
+
"rewards/accuracies": 0.4479166567325592,
|
36 |
+
"rewards/chosen": 0.07974544167518616,
|
37 |
+
"rewards/margins": 0.013408761471509933,
|
38 |
+
"rewards/rejected": 0.06633666902780533,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.1,
|
43 |
+
"grad_norm": 1443.7667771719773,
|
44 |
+
"learning_rate": 5e-07,
|
45 |
+
"logits/chosen": -4.906929969787598,
|
46 |
+
"logits/rejected": -5.0118937492370605,
|
47 |
+
"logps/chosen": -240.65188598632812,
|
48 |
+
"logps/rejected": -220.84378051757812,
|
49 |
+
"loss": 0.6926,
|
50 |
+
"rewards/accuracies": 0.612500011920929,
|
51 |
+
"rewards/chosen": 0.7429171204566956,
|
52 |
+
"rewards/margins": 1.1278517246246338,
|
53 |
+
"rewards/rejected": -0.38493460416793823,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.15,
|
58 |
+
"grad_norm": 1641.6770420153719,
|
59 |
+
"learning_rate": 4.959823971496574e-07,
|
60 |
+
"logits/chosen": -4.913812637329102,
|
61 |
+
"logits/rejected": -5.012935638427734,
|
62 |
+
"logps/chosen": -238.8269805908203,
|
63 |
+
"logps/rejected": -228.05404663085938,
|
64 |
+
"loss": 0.8116,
|
65 |
+
"rewards/accuracies": 0.6343749761581421,
|
66 |
+
"rewards/chosen": 1.8061437606811523,
|
67 |
+
"rewards/margins": 4.523256301879883,
|
68 |
+
"rewards/rejected": -2.7171127796173096,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.2,
|
73 |
+
"grad_norm": 1382.4291689510926,
|
74 |
+
"learning_rate": 4.840587176599343e-07,
|
75 |
+
"logits/chosen": -4.964416980743408,
|
76 |
+
"logits/rejected": -5.0027852058410645,
|
77 |
+
"logps/chosen": -249.1742706298828,
|
78 |
+
"logps/rejected": -235.87576293945312,
|
79 |
+
"loss": 0.9983,
|
80 |
+
"rewards/accuracies": 0.5531250238418579,
|
81 |
+
"rewards/chosen": 1.3685696125030518,
|
82 |
+
"rewards/margins": 4.053561210632324,
|
83 |
+
"rewards/rejected": -2.6849913597106934,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.26,
|
88 |
+
"grad_norm": 1428.1508779981239,
|
89 |
+
"learning_rate": 4.646121984004665e-07,
|
90 |
+
"logits/chosen": -4.990395545959473,
|
91 |
+
"logits/rejected": -5.134562015533447,
|
92 |
+
"logps/chosen": -251.7528076171875,
|
93 |
+
"logps/rejected": -226.17306518554688,
|
94 |
+
"loss": 0.9987,
|
95 |
+
"rewards/accuracies": 0.6468750238418579,
|
96 |
+
"rewards/chosen": 2.2698659896850586,
|
97 |
+
"rewards/margins": 5.616934299468994,
|
98 |
+
"rewards/rejected": -3.3470687866210938,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
+
"epoch": 0.31,
|
103 |
+
"grad_norm": 1429.7364912941882,
|
104 |
+
"learning_rate": 4.3826786650090273e-07,
|
105 |
+
"logits/chosen": -5.023388385772705,
|
106 |
+
"logits/rejected": -5.144254684448242,
|
107 |
+
"logps/chosen": -250.6563720703125,
|
108 |
+
"logps/rejected": -241.12484741210938,
|
109 |
+
"loss": 0.993,
|
110 |
+
"rewards/accuracies": 0.5843750238418579,
|
111 |
+
"rewards/chosen": 1.217611312866211,
|
112 |
+
"rewards/margins": 6.1895647048950195,
|
113 |
+
"rewards/rejected": -4.97195291519165,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.36,
|
118 |
+
"grad_norm": 1385.9054301583744,
|
119 |
+
"learning_rate": 4.058724504646834e-07,
|
120 |
+
"logits/chosen": -4.992190361022949,
|
121 |
+
"logits/rejected": -5.075345039367676,
|
122 |
+
"logps/chosen": -256.97406005859375,
|
123 |
+
"logps/rejected": -242.94003295898438,
|
124 |
+
"loss": 1.1539,
|
125 |
+
"rewards/accuracies": 0.606249988079071,
|
126 |
+
"rewards/chosen": 2.1734097003936768,
|
127 |
+
"rewards/margins": 5.453003883361816,
|
128 |
+
"rewards/rejected": -3.2795944213867188,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.41,
|
133 |
+
"grad_norm": 1267.3737422156325,
|
134 |
+
"learning_rate": 3.6846716561824967e-07,
|
135 |
+
"logits/chosen": -5.066686630249023,
|
136 |
+
"logits/rejected": -5.165375709533691,
|
137 |
+
"logps/chosen": -246.781982421875,
|
138 |
+
"logps/rejected": -232.3020477294922,
|
139 |
+
"loss": 1.1127,
|
140 |
+
"rewards/accuracies": 0.5562499761581421,
|
141 |
+
"rewards/chosen": 2.182149887084961,
|
142 |
+
"rewards/margins": 6.110042095184326,
|
143 |
+
"rewards/rejected": -3.927891492843628,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
+
"epoch": 0.46,
|
148 |
+
"grad_norm": 1414.9882610729042,
|
149 |
+
"learning_rate": 3.272542485937368e-07,
|
150 |
+
"logits/chosen": -5.056512355804443,
|
151 |
+
"logits/rejected": -5.19997501373291,
|
152 |
+
"logps/chosen": -236.23886108398438,
|
153 |
+
"logps/rejected": -219.4969940185547,
|
154 |
+
"loss": 1.1651,
|
155 |
+
"rewards/accuracies": 0.59375,
|
156 |
+
"rewards/chosen": 2.3071811199188232,
|
157 |
+
"rewards/margins": 4.593169212341309,
|
158 |
+
"rewards/rejected": -2.2859878540039062,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.51,
|
163 |
+
"grad_norm": 1730.7459110414102,
|
164 |
+
"learning_rate": 2.8355831645441387e-07,
|
165 |
+
"logits/chosen": -5.051321506500244,
|
166 |
+
"logits/rejected": -5.197503089904785,
|
167 |
+
"logps/chosen": -245.94680786132812,
|
168 |
+
"logps/rejected": -224.7979278564453,
|
169 |
+
"loss": 1.1049,
|
170 |
+
"rewards/accuracies": 0.643750011920929,
|
171 |
+
"rewards/chosen": 2.0447471141815186,
|
172 |
+
"rewards/margins": 3.989384412765503,
|
173 |
+
"rewards/rejected": -1.9446370601654053,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 0.56,
|
178 |
+
"grad_norm": 1376.721155787266,
|
179 |
+
"learning_rate": 2.3878379241237134e-07,
|
180 |
+
"logits/chosen": -5.05279541015625,
|
181 |
+
"logits/rejected": -5.2380499839782715,
|
182 |
+
"logps/chosen": -231.46408081054688,
|
183 |
+
"logps/rejected": -221.2686309814453,
|
184 |
+
"loss": 1.0653,
|
185 |
+
"rewards/accuracies": 0.637499988079071,
|
186 |
+
"rewards/chosen": 2.9433412551879883,
|
187 |
+
"rewards/margins": 7.433489799499512,
|
188 |
+
"rewards/rejected": -4.490148544311523,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.61,
|
193 |
+
"grad_norm": 1298.5481767381427,
|
194 |
+
"learning_rate": 1.9436976651092142e-07,
|
195 |
+
"logits/chosen": -4.989577293395996,
|
196 |
+
"logits/rejected": -5.143449306488037,
|
197 |
+
"logps/chosen": -250.3534698486328,
|
198 |
+
"logps/rejected": -237.04074096679688,
|
199 |
+
"loss": 1.0694,
|
200 |
+
"rewards/accuracies": 0.6343749761581421,
|
201 |
+
"rewards/chosen": 2.3243861198425293,
|
202 |
+
"rewards/margins": 8.470600128173828,
|
203 |
+
"rewards/rejected": -6.146214485168457,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.66,
|
208 |
+
"grad_norm": 1456.9702892975145,
|
209 |
+
"learning_rate": 1.517437420865191e-07,
|
210 |
+
"logits/chosen": -5.036610126495361,
|
211 |
+
"logits/rejected": -5.181552886962891,
|
212 |
+
"logps/chosen": -234.2519073486328,
|
213 |
+
"logps/rejected": -226.05050659179688,
|
214 |
+
"loss": 1.1374,
|
215 |
+
"rewards/accuracies": 0.612500011920929,
|
216 |
+
"rewards/chosen": 2.612969160079956,
|
217 |
+
"rewards/margins": 6.129396915435791,
|
218 |
+
"rewards/rejected": -3.516427516937256,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.72,
|
223 |
+
"grad_norm": 1414.11944634508,
|
224 |
+
"learning_rate": 1.1227575463697439e-07,
|
225 |
+
"logits/chosen": -5.011117458343506,
|
226 |
+
"logits/rejected": -5.0677995681762695,
|
227 |
+
"logps/chosen": -246.2405242919922,
|
228 |
+
"logps/rejected": -240.97647094726562,
|
229 |
+
"loss": 1.0012,
|
230 |
+
"rewards/accuracies": 0.6625000238418579,
|
231 |
+
"rewards/chosen": 2.1312901973724365,
|
232 |
+
"rewards/margins": 6.49268102645874,
|
233 |
+
"rewards/rejected": -4.361390590667725,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.77,
|
238 |
+
"grad_norm": 1391.6252979817953,
|
239 |
+
"learning_rate": 7.723433775328384e-08,
|
240 |
+
"logits/chosen": -5.031737327575684,
|
241 |
+
"logits/rejected": -5.141982078552246,
|
242 |
+
"logps/chosen": -247.31640625,
|
243 |
+
"logps/rejected": -245.01284790039062,
|
244 |
+
"loss": 1.0468,
|
245 |
+
"rewards/accuracies": 0.6625000238418579,
|
246 |
+
"rewards/chosen": 3.413778781890869,
|
247 |
+
"rewards/margins": 8.60617446899414,
|
248 |
+
"rewards/rejected": -5.19239616394043,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.82,
|
253 |
+
"grad_norm": 1305.4800329449993,
|
254 |
+
"learning_rate": 4.774575140626316e-08,
|
255 |
+
"logits/chosen": -4.959289073944092,
|
256 |
+
"logits/rejected": -5.040767192840576,
|
257 |
+
"logps/chosen": -253.7027587890625,
|
258 |
+
"logps/rejected": -250.91659545898438,
|
259 |
+
"loss": 0.9992,
|
260 |
+
"rewards/accuracies": 0.6781250238418579,
|
261 |
+
"rewards/chosen": 3.046278476715088,
|
262 |
+
"rewards/margins": 8.344175338745117,
|
263 |
+
"rewards/rejected": -5.297896862030029,
|
264 |
+
"step": 160
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 0.87,
|
268 |
+
"grad_norm": 1228.1104796269808,
|
269 |
+
"learning_rate": 2.475778302439524e-08,
|
270 |
+
"logits/chosen": -5.096159934997559,
|
271 |
+
"logits/rejected": -5.178959369659424,
|
272 |
+
"logps/chosen": -251.2628631591797,
|
273 |
+
"logps/rejected": -233.06857299804688,
|
274 |
+
"loss": 1.0057,
|
275 |
+
"rewards/accuracies": 0.5843750238418579,
|
276 |
+
"rewards/chosen": 2.824694871902466,
|
277 |
+
"rewards/margins": 6.200740814208984,
|
278 |
+
"rewards/rejected": -3.3760459423065186,
|
279 |
+
"step": 170
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 0.92,
|
283 |
+
"grad_norm": 1348.827014256151,
|
284 |
+
"learning_rate": 9.009284826036689e-09,
|
285 |
+
"logits/chosen": -4.995651721954346,
|
286 |
+
"logits/rejected": -5.102165222167969,
|
287 |
+
"logps/chosen": -237.61990356445312,
|
288 |
+
"logps/rejected": -232.7886962890625,
|
289 |
+
"loss": 0.9321,
|
290 |
+
"rewards/accuracies": 0.659375011920929,
|
291 |
+
"rewards/chosen": 2.423119068145752,
|
292 |
+
"rewards/margins": 4.8792009353637695,
|
293 |
+
"rewards/rejected": -2.4560813903808594,
|
294 |
+
"step": 180
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 0.97,
|
298 |
+
"grad_norm": 1117.1672982866971,
|
299 |
+
"learning_rate": 1.0064265011902328e-09,
|
300 |
+
"logits/chosen": -5.071808815002441,
|
301 |
+
"logits/rejected": -5.110179901123047,
|
302 |
+
"logps/chosen": -236.14224243164062,
|
303 |
+
"logps/rejected": -233.5693359375,
|
304 |
+
"loss": 0.9891,
|
305 |
+
"rewards/accuracies": 0.640625,
|
306 |
+
"rewards/chosen": 1.8652112483978271,
|
307 |
+
"rewards/margins": 5.820201873779297,
|
308 |
+
"rewards/rejected": -3.9549899101257324,
|
309 |
+
"step": 190
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 1.0,
|
313 |
+
"step": 195,
|
314 |
"total_flos": 0.0,
|
315 |
+
"train_loss": 0.9975380127246564,
|
316 |
+
"train_runtime": 5482.1546,
|
317 |
+
"train_samples_per_second": 9.12,
|
318 |
+
"train_steps_per_second": 0.036
|
319 |
}
|
320 |
],
|
321 |
"logging_steps": 10,
|
322 |
+
"max_steps": 195,
|
323 |
"num_input_tokens_seen": 0,
|
324 |
"num_train_epochs": 1,
|
325 |
"save_steps": 100,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
"total_flos": 0.0,
|
327 |
"train_batch_size": 8,
|
328 |
"trial_name": null,
|