Training in progress, step 26400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1551,6 +1551,10 @@ You can finetune this model on your own dataset.
|
|
1551 |
| 0.4612 | 26100 | 0.2856 |
|
1552 |
| 0.4621 | 26150 | 0.3668 |
|
1553 |
| 0.4630 | 26200 | 0.4672 |
|
|
|
|
|
|
|
|
|
1554 |
|
1555 |
</details>
|
1556 |
|
|
|
1551 |
| 0.4612 | 26100 | 0.2856 |
|
1552 |
| 0.4621 | 26150 | 0.3668 |
|
1553 |
| 0.4630 | 26200 | 0.4672 |
|
1554 |
+
| 0.4639 | 26250 | 0.373 |
|
1555 |
+
| 0.4647 | 26300 | 0.3232 |
|
1556 |
+
| 0.4656 | 26350 | 0.3461 |
|
1557 |
+
| 0.4665 | 26400 | 0.2937 |
|
1558 |
|
1559 |
</details>
|
1560 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95db985db084a74db34108a18955cb2f94475e94d71ba1394dc6711c0773d4b7
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99edc411ece53670a76daae12b850cb27d05b6b28f0e1abb1039884ae4ec4d91
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3c74149b9485dd85a90bddde2fb89338979661fdbddb3f61abad71930d1b347
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5aa35335d6835971914c8cd5a825891507e75318afb8a7d46177ecba5e7044a7
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8741bd796701c3df080c6afd4a635df632277d4edf7423e34ebd4e6d1a562aa
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -3676,6 +3676,34 @@
|
|
3676 |
"learning_rate": 2.9844299149830163e-05,
|
3677 |
"loss": 0.4672,
|
3678 |
"step": 26200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3679 |
}
|
3680 |
],
|
3681 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.46650527469032177,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 26400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
3676 |
"learning_rate": 2.9844299149830163e-05,
|
3677 |
"loss": 0.4672,
|
3678 |
"step": 26200
|
3679 |
+
},
|
3680 |
+
{
|
3681 |
+
"epoch": 0.46385467653867224,
|
3682 |
+
"grad_norm": 3.4169373512268066,
|
3683 |
+
"learning_rate": 2.9795213131491627e-05,
|
3684 |
+
"loss": 0.373,
|
3685 |
+
"step": 26250
|
3686 |
+
},
|
3687 |
+
{
|
3688 |
+
"epoch": 0.46473820925588877,
|
3689 |
+
"grad_norm": 1.5430257320404053,
|
3690 |
+
"learning_rate": 2.974612711315309e-05,
|
3691 |
+
"loss": 0.3232,
|
3692 |
+
"step": 26300
|
3693 |
+
},
|
3694 |
+
{
|
3695 |
+
"epoch": 0.46562174197310524,
|
3696 |
+
"grad_norm": 1.674177646636963,
|
3697 |
+
"learning_rate": 2.9697041094814553e-05,
|
3698 |
+
"loss": 0.3461,
|
3699 |
+
"step": 26350
|
3700 |
+
},
|
3701 |
+
{
|
3702 |
+
"epoch": 0.46650527469032177,
|
3703 |
+
"grad_norm": 1.7116457223892212,
|
3704 |
+
"learning_rate": 2.9647955076476018e-05,
|
3705 |
+
"loss": 0.2937,
|
3706 |
+
"step": 26400
|
3707 |
}
|
3708 |
],
|
3709 |
"logging_steps": 50,
|