Training in progress, step 47400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1237,6 +1237,10 @@ You can finetune this model on your own dataset.
|
|
1237 |
| 0.8323 | 47100 | 0.3721 |
|
1238 |
| 0.8332 | 47150 | 0.3483 |
|
1239 |
| 0.8341 | 47200 | 0.3002 |
|
|
|
|
|
|
|
|
|
1240 |
|
1241 |
|
1242 |
### Framework Versions
|
|
|
1237 |
| 0.8323 | 47100 | 0.3721 |
|
1238 |
| 0.8332 | 47150 | 0.3483 |
|
1239 |
| 0.8341 | 47200 | 0.3002 |
|
1240 |
+
| 0.8349 | 47250 | 0.2333 |
|
1241 |
+
| 0.8358 | 47300 | 0.3043 |
|
1242 |
+
| 0.8367 | 47350 | 0.2992 |
|
1243 |
+
| 0.8376 | 47400 | 0.3367 |
|
1244 |
|
1245 |
|
1246 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa899c39466858c1bb72384b17719bcb1b30345e9ce76f3b6b83ef15a3594ea
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2193950b0b0024f572dc11e3f6e5765584be0f18471c5c13d65ef36051f2d86c
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eb8dcdd8e8b75fd491010fd65bc9f4650cdec68128bc3f93969648db6824c7d
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f17b09e37e3cf1eb4e07e5a39443f3088190a89ec52eee73b652e920bd5e3c6d
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0328ae42c77686bf92831f9769ee36bd90306f1f061a996a4634ff09f38cc121
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6616,6 +6616,34 @@
|
|
6616 |
"learning_rate": 9.237988651312562e-06,
|
6617 |
"loss": 0.3002,
|
6618 |
"step": 47200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6619 |
}
|
6620 |
],
|
6621 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.8375890159212596,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 47400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6616 |
"learning_rate": 9.237988651312562e-06,
|
6617 |
"loss": 0.3002,
|
6618 |
"step": 47200
|
6619 |
+
},
|
6620 |
+
{
|
6621 |
+
"epoch": 0.83493841776961,
|
6622 |
+
"grad_norm": 1.0670841932296753,
|
6623 |
+
"learning_rate": 9.188902632974023e-06,
|
6624 |
+
"loss": 0.2333,
|
6625 |
+
"step": 47250
|
6626 |
+
},
|
6627 |
+
{
|
6628 |
+
"epoch": 0.8358219504868265,
|
6629 |
+
"grad_norm": 1.9052667617797852,
|
6630 |
+
"learning_rate": 9.139816614635488e-06,
|
6631 |
+
"loss": 0.3043,
|
6632 |
+
"step": 47300
|
6633 |
+
},
|
6634 |
+
{
|
6635 |
+
"epoch": 0.836705483204043,
|
6636 |
+
"grad_norm": 2.9311811923980713,
|
6637 |
+
"learning_rate": 9.09073059629695e-06,
|
6638 |
+
"loss": 0.2992,
|
6639 |
+
"step": 47350
|
6640 |
+
},
|
6641 |
+
{
|
6642 |
+
"epoch": 0.8375890159212596,
|
6643 |
+
"grad_norm": 1.2520331144332886,
|
6644 |
+
"learning_rate": 9.041644577958415e-06,
|
6645 |
+
"loss": 0.3367,
|
6646 |
+
"step": 47400
|
6647 |
}
|
6648 |
],
|
6649 |
"logging_steps": 50,
|