Training in progress, step 24000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1503,6 +1503,10 @@ You can finetune this model on your own dataset.
|
|
1503 |
| 0.4188 | 23700 | 0.2905 |
|
1504 |
| 0.4197 | 23750 | 0.3234 |
|
1505 |
| 0.4206 | 23800 | 0.3063 |
|
|
|
|
|
|
|
|
|
1506 |
|
1507 |
</details>
|
1508 |
|
|
|
1503 |
| 0.4188 | 23700 | 0.2905 |
|
1504 |
| 0.4197 | 23750 | 0.3234 |
|
1505 |
| 0.4206 | 23800 | 0.3063 |
|
1506 |
+
| 0.4214 | 23850 | 0.3386 |
|
1507 |
+
| 0.4223 | 23900 | 0.3431 |
|
1508 |
+
| 0.4232 | 23950 | 0.2902 |
|
1509 |
+
| 0.4241 | 24000 | 0.3136 |
|
1510 |
|
1511 |
</details>
|
1512 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f90e824e1a9ffb638cde5da629726eeb7bc12cd6362fbb12d7d04ce5372c8f5
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9a99a7427fcc196b3d93d1ed84e8a7ea1d28d3ea047d1257af87399a72693de
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66083f07808429ecf1c592a46d5e2fa5fd55c02b1303e07c74cebcc8fc44243c
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:367e0d39f4bf5ffc122ff5c01987f9ae3492dd527c06b1748807449df9f2581e
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33058f7a86733565c00edc7dc90e657260ca9633bceae0708704d9688e1a076c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -3340,6 +3340,34 @@
|
|
3340 |
"learning_rate": 3.2199446309713146e-05,
|
3341 |
"loss": 0.3063,
|
3342 |
"step": 23800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3343 |
}
|
3344 |
],
|
3345 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.4240957042639289,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 24000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
3340 |
"learning_rate": 3.2199446309713146e-05,
|
3341 |
"loss": 0.3063,
|
3342 |
"step": 23800
|
3343 |
+
},
|
3344 |
+
{
|
3345 |
+
"epoch": 0.4214451061122793,
|
3346 |
+
"grad_norm": 1.289444923400879,
|
3347 |
+
"learning_rate": 3.2150360291374604e-05,
|
3348 |
+
"loss": 0.3386,
|
3349 |
+
"step": 23850
|
3350 |
+
},
|
3351 |
+
{
|
3352 |
+
"epoch": 0.42232863882949584,
|
3353 |
+
"grad_norm": 2.95922589302063,
|
3354 |
+
"learning_rate": 3.210127427303607e-05,
|
3355 |
+
"loss": 0.3431,
|
3356 |
+
"step": 23900
|
3357 |
+
},
|
3358 |
+
{
|
3359 |
+
"epoch": 0.42321217154671237,
|
3360 |
+
"grad_norm": 1.6753530502319336,
|
3361 |
+
"learning_rate": 3.2052188254697534e-05,
|
3362 |
+
"loss": 0.2902,
|
3363 |
+
"step": 23950
|
3364 |
+
},
|
3365 |
+
{
|
3366 |
+
"epoch": 0.4240957042639289,
|
3367 |
+
"grad_norm": 1.6901003122329712,
|
3368 |
+
"learning_rate": 3.2003102236359e-05,
|
3369 |
+
"loss": 0.3136,
|
3370 |
+
"step": 24000
|
3371 |
}
|
3372 |
],
|
3373 |
"logging_steps": 50,
|