Training in progress, step 40000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1315,6 +1315,10 @@ You can finetune this model on your own dataset.
|
|
1315 |
| 0.7015 | 39700 | 0.2503 |
|
1316 |
| 0.7024 | 39750 | 0.3593 |
|
1317 |
| 0.7033 | 39800 | 0.2933 |
|
|
|
|
|
|
|
|
|
1318 |
|
1319 |
</details>
|
1320 |
|
|
|
1315 |
| 0.7015 | 39700 | 0.2503 |
|
1316 |
| 0.7024 | 39750 | 0.3593 |
|
1317 |
| 0.7033 | 39800 | 0.2933 |
|
1318 |
+
| 0.7042 | 39850 | 0.3565 |
|
1319 |
+
| 0.7051 | 39900 | 0.2838 |
|
1320 |
+
| 0.7059 | 39950 | 0.2604 |
|
1321 |
+
| 0.7068 | 40000 | 0.2286 |
|
1322 |
|
1323 |
</details>
|
1324 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c7b416494c6b7505e7fa6eedc83168208b62d0c9428d6cb56bafda1f34f24d9
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be35770dca7605f30d91ba6120504ca43247cfeb02e3d77916fdb2cc3dbfaba0
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbf81c04ea51b7be585fb96d50f50e5dc3906dc50f1775c970264b1a4d1eefeb
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48c793708154234024ec2f24e89dee0955c43c3c92b7c5bd216b3138f06c4acd
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eec77879460249ba1a47baa584385b3461e6077b0f1f35d15287420496875963
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -5580,6 +5580,34 @@
|
|
5580 |
"learning_rate": 1.6498792483948873e-05,
|
5581 |
"loss": 0.2933,
|
5582 |
"step": 39800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5583 |
}
|
5584 |
],
|
5585 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.7068261737732148,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 40000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
5580 |
"learning_rate": 1.6498792483948873e-05,
|
5581 |
"loss": 0.2933,
|
5582 |
"step": 39800
|
5583 |
+
},
|
5584 |
+
{
|
5585 |
+
"epoch": 0.7041755756215653,
|
5586 |
+
"grad_norm": 1.1238136291503906,
|
5587 |
+
"learning_rate": 1.6449706465610338e-05,
|
5588 |
+
"loss": 0.3565,
|
5589 |
+
"step": 39850
|
5590 |
+
},
|
5591 |
+
{
|
5592 |
+
"epoch": 0.7050591083387818,
|
5593 |
+
"grad_norm": 1.5694066286087036,
|
5594 |
+
"learning_rate": 1.64006204472718e-05,
|
5595 |
+
"loss": 0.2838,
|
5596 |
+
"step": 39900
|
5597 |
+
},
|
5598 |
+
{
|
5599 |
+
"epoch": 0.7059426410559984,
|
5600 |
+
"grad_norm": 1.808310866355896,
|
5601 |
+
"learning_rate": 1.6351534428933263e-05,
|
5602 |
+
"loss": 0.2604,
|
5603 |
+
"step": 39950
|
5604 |
+
},
|
5605 |
+
{
|
5606 |
+
"epoch": 0.7068261737732148,
|
5607 |
+
"grad_norm": 1.6668068170547485,
|
5608 |
+
"learning_rate": 1.6302448410594725e-05,
|
5609 |
+
"loss": 0.2286,
|
5610 |
+
"step": 40000
|
5611 |
}
|
5612 |
],
|
5613 |
"logging_steps": 50,
|