Training in progress, step 39800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1311,6 +1311,10 @@ You can finetune this model on your own dataset.
|
|
1311 |
| 0.6980 | 39500 | 0.2728 |
|
1312 |
| 0.6989 | 39550 | 0.2482 |
|
1313 |
| 0.6998 | 39600 | 0.3475 |
|
|
|
|
|
|
|
|
|
1314 |
|
1315 |
</details>
|
1316 |
|
|
|
1311 |
| 0.6980 | 39500 | 0.2728 |
|
1312 |
| 0.6989 | 39550 | 0.2482 |
|
1313 |
| 0.6998 | 39600 | 0.3475 |
|
1314 |
+
| 0.7006 | 39650 | 0.3447 |
|
1315 |
+
| 0.7015 | 39700 | 0.2503 |
|
1316 |
+
| 0.7024 | 39750 | 0.3593 |
|
1317 |
+
| 0.7033 | 39800 | 0.2933 |
|
1318 |
|
1319 |
</details>
|
1320 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7e3850ec81152c770c1287a58b16a081486e29359e4ddd6242aec24390a1095
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe6982d0068dc488d659ee3ea863838224b7f84d13622881dc74bd17ff8174f7
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf03aa505c305170adf85ee73ee56bc32702b85d7c76a3b04c609b9202b5754a
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71d3db821ebc15469e7d3f1fde03936d361471a5f2cb1a21d3669fed48601709
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2795a9eb0d0e9717e4fddeb0ae82fb57c15cae7d137da34c1144f8d3ca3cd594
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -5552,6 +5552,34 @@
|
|
5552 |
"learning_rate": 1.6695136557303018e-05,
|
5553 |
"loss": 0.3475,
|
5554 |
"step": 39600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5555 |
}
|
5556 |
],
|
5557 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.7032920429043488,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 39800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
5552 |
"learning_rate": 1.6695136557303018e-05,
|
5553 |
"loss": 0.3475,
|
5554 |
"step": 39600
|
5555 |
+
},
|
5556 |
+
{
|
5557 |
+
"epoch": 0.7006414447526992,
|
5558 |
+
"grad_norm": 0.6908143758773804,
|
5559 |
+
"learning_rate": 1.6646050538964482e-05,
|
5560 |
+
"loss": 0.3447,
|
5561 |
+
"step": 39650
|
5562 |
+
},
|
5563 |
+
{
|
5564 |
+
"epoch": 0.7015249774699157,
|
5565 |
+
"grad_norm": 1.5447782278060913,
|
5566 |
+
"learning_rate": 1.6596964520625947e-05,
|
5567 |
+
"loss": 0.2503,
|
5568 |
+
"step": 39700
|
5569 |
+
},
|
5570 |
+
{
|
5571 |
+
"epoch": 0.7024085101871322,
|
5572 |
+
"grad_norm": 1.515202283859253,
|
5573 |
+
"learning_rate": 1.6547878502287408e-05,
|
5574 |
+
"loss": 0.3593,
|
5575 |
+
"step": 39750
|
5576 |
+
},
|
5577 |
+
{
|
5578 |
+
"epoch": 0.7032920429043488,
|
5579 |
+
"grad_norm": 4.640558242797852,
|
5580 |
+
"learning_rate": 1.6498792483948873e-05,
|
5581 |
+
"loss": 0.2933,
|
5582 |
+
"step": 39800
|
5583 |
}
|
5584 |
],
|
5585 |
"logging_steps": 50,
|