Training in progress, step 32400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1307,6 +1307,10 @@ You can finetune this model on your own dataset.
|
|
1307 |
| 0.5672 | 32100 | 0.2491 |
|
1308 |
| 0.5681 | 32150 | 0.2663 |
|
1309 |
| 0.5690 | 32200 | 0.3433 |
|
|
|
|
|
|
|
|
|
1310 |
|
1311 |
</details>
|
1312 |
|
|
|
1307 |
| 0.5672 | 32100 | 0.2491 |
|
1308 |
| 0.5681 | 32150 | 0.2663 |
|
1309 |
| 0.5690 | 32200 | 0.3433 |
|
1310 |
+
| 0.5699 | 32250 | 0.3375 |
|
1311 |
+
| 0.5708 | 32300 | 0.2891 |
|
1312 |
+
| 0.5716 | 32350 | 0.296 |
|
1313 |
+
| 0.5725 | 32400 | 0.2478 |
|
1314 |
|
1315 |
</details>
|
1316 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae7029cc4a936db82aecdfc345ae4f6be581e777c25d3e011d86f49807e6bd27
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb5e77212890ce43c0fc7a397665bcd4da432ff88fa93a14b948bcf16b02cdd
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa682a307fc96e53a68118cb34dd8ebd33bebc1ee7d8a07cfe20e4ef67c0641e
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7d183fbaa9612fc768da7cf7cefd13466b61ea27398ff0fc8e2f5977752f4ef
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71836e8eeb9479ebd5deabd14069458b8ae082f80c2be8da5292f909ebf1971a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -4516,6 +4516,34 @@
|
|
4516 |
"learning_rate": 2.395594038993933e-05,
|
4517 |
"loss": 0.3433,
|
4518 |
"step": 32200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4519 |
}
|
4520 |
],
|
4521 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.572529200756304,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 32400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
4516 |
"learning_rate": 2.395594038993933e-05,
|
4517 |
"loss": 0.3433,
|
4518 |
"step": 32200
|
4519 |
+
},
|
4520 |
+
{
|
4521 |
+
"epoch": 0.5698786026046545,
|
4522 |
+
"grad_norm": 1.4697036743164062,
|
4523 |
+
"learning_rate": 2.3906854371600795e-05,
|
4524 |
+
"loss": 0.3375,
|
4525 |
+
"step": 32250
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 0.5707621353218709,
|
4529 |
+
"grad_norm": 2.39277720451355,
|
4530 |
+
"learning_rate": 2.385776835326226e-05,
|
4531 |
+
"loss": 0.2891,
|
4532 |
+
"step": 32300
|
4533 |
+
},
|
4534 |
+
{
|
4535 |
+
"epoch": 0.5716456680390875,
|
4536 |
+
"grad_norm": 1.5755674839019775,
|
4537 |
+
"learning_rate": 2.380868233492372e-05,
|
4538 |
+
"loss": 0.296,
|
4539 |
+
"step": 32350
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 0.572529200756304,
|
4543 |
+
"grad_norm": 1.5802369117736816,
|
4544 |
+
"learning_rate": 2.3759596316585182e-05,
|
4545 |
+
"loss": 0.2478,
|
4546 |
+
"step": 32400
|
4547 |
}
|
4548 |
],
|
4549 |
"logging_steps": 50,
|