Training in progress, step 120, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3380768360
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78dac7535dac9a926aa3d6f1cede2677776554a31ae3b03733c9be34bb45feb8
|
3 |
size 3380768360
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ddc11eef2343db6f5ad7d3c768a98ae313ab5bdd1d33c62a12390009d4c92aa
|
3 |
+
size 1757899449
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97490d166ca8bc27bfa10807632f9ecb473b145cce74c93d287cde23f8af51fb
|
3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69b592d4efa2ff6d0584dfc8cf30049181a8d5c8977939386b39d5882c0a494e
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch":
|
6 |
"eval_steps": 30,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -165,6 +165,56 @@
|
|
165 |
"eval_samples_per_second": 0.292,
|
166 |
"eval_steps_per_second": 0.073,
|
167 |
"step": 90
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
}
|
169 |
],
|
170 |
"logging_steps": 5,
|
@@ -184,7 +234,7 @@
|
|
184 |
"attributes": {}
|
185 |
}
|
186 |
},
|
187 |
-
"total_flos":
|
188 |
"train_batch_size": 8,
|
189 |
"trial_name": null,
|
190 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 1.2508710801393728,
|
6 |
"eval_steps": 30,
|
7 |
+
"global_step": 120,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
165 |
"eval_samples_per_second": 0.292,
|
166 |
"eval_steps_per_second": 0.073,
|
167 |
"step": 90
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"epoch": 0.9930313588850174,
|
171 |
+
"grad_norm": 0.10834779590368271,
|
172 |
+
"learning_rate": 5.4217553302152237e-05,
|
173 |
+
"loss": 0.2944,
|
174 |
+
"step": 95
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 1.0418118466898956,
|
178 |
+
"grad_norm": 0.0633026584982872,
|
179 |
+
"learning_rate": 5e-05,
|
180 |
+
"loss": 0.2095,
|
181 |
+
"step": 100
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 1.0940766550522647,
|
185 |
+
"grad_norm": 0.07635607570409775,
|
186 |
+
"learning_rate": 4.578244669784777e-05,
|
187 |
+
"loss": 0.2058,
|
188 |
+
"step": 105
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 1.146341463414634,
|
192 |
+
"grad_norm": 0.0802493542432785,
|
193 |
+
"learning_rate": 4.15949552878926e-05,
|
194 |
+
"loss": 0.2232,
|
195 |
+
"step": 110
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"epoch": 1.1986062717770034,
|
199 |
+
"grad_norm": 0.12653642892837524,
|
200 |
+
"learning_rate": 3.746737338706397e-05,
|
201 |
+
"loss": 0.2532,
|
202 |
+
"step": 115
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"epoch": 1.2508710801393728,
|
206 |
+
"grad_norm": 0.07813160121440887,
|
207 |
+
"learning_rate": 3.3429121589068215e-05,
|
208 |
+
"loss": 0.2893,
|
209 |
+
"step": 120
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"epoch": 1.2508710801393728,
|
213 |
+
"eval_loss": 0.239236518740654,
|
214 |
+
"eval_runtime": 1751.4721,
|
215 |
+
"eval_samples_per_second": 0.291,
|
216 |
+
"eval_steps_per_second": 0.073,
|
217 |
+
"step": 120
|
218 |
}
|
219 |
],
|
220 |
"logging_steps": 5,
|
|
|
234 |
"attributes": {}
|
235 |
}
|
236 |
},
|
237 |
+
"total_flos": 7.763514566754386e+17,
|
238 |
"train_batch_size": 8,
|
239 |
"trial_name": null,
|
240 |
"trial_params": null
|