Training in progress, step 39000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1295,6 +1295,10 @@ You can finetune this model on your own dataset.
|
|
1295 |
| 0.6839 | 38700 | 0.3456 |
|
1296 |
| 0.6847 | 38750 | 0.2554 |
|
1297 |
| 0.6856 | 38800 | 0.2289 |
|
|
|
|
|
|
|
|
|
1298 |
|
1299 |
</details>
|
1300 |
|
|
|
1295 |
| 0.6839 | 38700 | 0.3456 |
|
1296 |
| 0.6847 | 38750 | 0.2554 |
|
1297 |
| 0.6856 | 38800 | 0.2289 |
|
1298 |
+
| 0.6865 | 38850 | 0.2409 |
|
1299 |
+
| 0.6874 | 38900 | 0.3395 |
|
1300 |
+
| 0.6883 | 38950 | 0.3296 |
|
1301 |
+
| 0.6892 | 39000 | 0.349 |
|
1302 |
|
1303 |
</details>
|
1304 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6ce327cffaa12e6c6a5ddcb4a6d40429edafacde9dfb54d675b913bfba08975
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20234fcfe1aeda353bad872052502098141b92c7a49294548fc32efb7c40e203
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bd0776e6b0bd55ed184f92817d37c75213ff68505295e3fc1bee2b951ea4bee
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e33c3adb678c12b738b4f6b854bebd80c46584381912137221d663740b1248a
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54902ad30aa2955b28423fee094e905e66f30b7e8919420508a8b07f3d3c75a6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -5440,6 +5440,34 @@
|
|
5440 |
"learning_rate": 1.7480512850719603e-05,
|
5441 |
"loss": 0.2289,
|
5442 |
"step": 38800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5443 |
}
|
5444 |
],
|
5445 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.6891555194288844,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 39000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
5440 |
"learning_rate": 1.7480512850719603e-05,
|
5441 |
"loss": 0.2289,
|
5442 |
"step": 38800
|
5443 |
+
},
|
5444 |
+
{
|
5445 |
+
"epoch": 0.686504921277235,
|
5446 |
+
"grad_norm": 1.4531927108764648,
|
5447 |
+
"learning_rate": 1.7431426832381064e-05,
|
5448 |
+
"loss": 0.2409,
|
5449 |
+
"step": 38850
|
5450 |
+
},
|
5451 |
+
{
|
5452 |
+
"epoch": 0.6873884539944514,
|
5453 |
+
"grad_norm": 3.168332576751709,
|
5454 |
+
"learning_rate": 1.738234081404253e-05,
|
5455 |
+
"loss": 0.3395,
|
5456 |
+
"step": 38900
|
5457 |
+
},
|
5458 |
+
{
|
5459 |
+
"epoch": 0.6882719867116679,
|
5460 |
+
"grad_norm": 1.5215202569961548,
|
5461 |
+
"learning_rate": 1.7333254795703994e-05,
|
5462 |
+
"loss": 0.3296,
|
5463 |
+
"step": 38950
|
5464 |
+
},
|
5465 |
+
{
|
5466 |
+
"epoch": 0.6891555194288844,
|
5467 |
+
"grad_norm": 1.9889358282089233,
|
5468 |
+
"learning_rate": 1.7284168777365455e-05,
|
5469 |
+
"loss": 0.349,
|
5470 |
+
"step": 39000
|
5471 |
}
|
5472 |
],
|
5473 |
"logging_steps": 50,
|