Training in progress, step 70, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9149805fd0ff6c9295a21f0ceed866ec5e19b987dc9b024c5713996cbccee03
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 85723284
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e2f761486d2d2277401dace4f44681830fd254ea3d41966c9a17c77fec745aa
|
3 |
size 85723284
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4abd8c451028780b83d24be580224b85633dd02b3c6970e7c2e5f6a76f5c96a5
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34cc44421081656693b70701aad2c568b7fd366e841bbfc436ac5b6eb6c2b321
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 7,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -234,6 +234,28 @@
|
|
234 |
"eval_samples_per_second": 13.589,
|
235 |
"eval_steps_per_second": 1.989,
|
236 |
"step": 63
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
}
|
238 |
],
|
239 |
"logging_steps": 3,
|
@@ -253,7 +275,7 @@
|
|
253 |
"attributes": {}
|
254 |
}
|
255 |
},
|
256 |
-
"total_flos":
|
257 |
"train_batch_size": 8,
|
258 |
"trial_name": null,
|
259 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.88659793814433,
|
5 |
"eval_steps": 7,
|
6 |
+
"global_step": 70,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
234 |
"eval_samples_per_second": 13.589,
|
235 |
"eval_steps_per_second": 1.989,
|
236 |
"step": 63
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 2.7216494845360826,
|
240 |
+
"grad_norm": 1.3034956455230713,
|
241 |
+
"learning_rate": 3.0153689607045845e-06,
|
242 |
+
"loss": 0.8017,
|
243 |
+
"step": 66
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"epoch": 2.845360824742268,
|
247 |
+
"grad_norm": 1.2311569452285767,
|
248 |
+
"learning_rate": 9.913756075728087e-07,
|
249 |
+
"loss": 0.8266,
|
250 |
+
"step": 69
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"epoch": 2.88659793814433,
|
254 |
+
"eval_loss": 1.205170750617981,
|
255 |
+
"eval_runtime": 3.0134,
|
256 |
+
"eval_samples_per_second": 13.606,
|
257 |
+
"eval_steps_per_second": 1.991,
|
258 |
+
"step": 70
|
259 |
}
|
260 |
],
|
261 |
"logging_steps": 3,
|
|
|
275 |
"attributes": {}
|
276 |
}
|
277 |
},
|
278 |
+
"total_flos": 1.0340054014623744e+17,
|
279 |
"train_batch_size": 8,
|
280 |
"trial_name": null,
|
281 |
"trial_params": null
|