Training in progress, step 12600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1273,6 +1273,10 @@ You can finetune this model on your own dataset.
|
|
| 1273 |
| 0.2173 | 12300 | 0.3686 |
|
| 1274 |
| 0.2182 | 12350 | 0.4163 |
|
| 1275 |
| 0.2191 | 12400 | 0.3595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1276 |
|
| 1277 |
|
| 1278 |
### Framework Versions
|
|
|
|
| 1273 |
| 0.2173 | 12300 | 0.3686 |
|
| 1274 |
| 0.2182 | 12350 | 0.4163 |
|
| 1275 |
| 0.2191 | 12400 | 0.3595 |
|
| 1276 |
+
| 0.2200 | 12450 | 0.3326 |
|
| 1277 |
+
| 0.2209 | 12500 | 0.3775 |
|
| 1278 |
+
| 0.2218 | 12550 | 0.3695 |
|
| 1279 |
+
| 0.2227 | 12600 | 0.3545 |
|
| 1280 |
|
| 1281 |
|
| 1282 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80936dfc8900c8ac8c5575eef651ee2d03a5cd89aa29046749ab69fc98c04269
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ae8d02c618bd157f065f641eb6849f4a49c9cf111a69fc22cd74fafb816bffc
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2810d275875e84011ae759c1d7b23d09c29defe9da62dd6f19a6663a8f613d62
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ce2d7b7dd5e5f39af67a8ead71654a635306c138883914bf7f8972e42520e3f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9386fe2e7b7d4f410bc4b63dcc7461a70d3aea6cca8295dc5a10ef7582b0f51
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1744,6 +1744,34 @@
|
|
| 1744 |
"learning_rate": 4.3385168168698834e-05,
|
| 1745 |
"loss": 0.3595,
|
| 1746 |
"step": 12400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1747 |
}
|
| 1748 |
],
|
| 1749 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.22265024473856268,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 12600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1744 |
"learning_rate": 4.3385168168698834e-05,
|
| 1745 |
"loss": 0.3595,
|
| 1746 |
"step": 12400
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 0.21999964658691312,
|
| 1750 |
+
"grad_norm": 1.5602166652679443,
|
| 1751 |
+
"learning_rate": 4.333608215036029e-05,
|
| 1752 |
+
"loss": 0.3326,
|
| 1753 |
+
"step": 12450
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 0.22088317930412962,
|
| 1757 |
+
"grad_norm": 1.7230535745620728,
|
| 1758 |
+
"learning_rate": 4.328699613202176e-05,
|
| 1759 |
+
"loss": 0.3775,
|
| 1760 |
+
"step": 12500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 0.22176671202134615,
|
| 1764 |
+
"grad_norm": 1.8666094541549683,
|
| 1765 |
+
"learning_rate": 4.323791011368322e-05,
|
| 1766 |
+
"loss": 0.3695,
|
| 1767 |
+
"step": 12550
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 0.22265024473856268,
|
| 1771 |
+
"grad_norm": 3.1689233779907227,
|
| 1772 |
+
"learning_rate": 4.318882409534468e-05,
|
| 1773 |
+
"loss": 0.3545,
|
| 1774 |
+
"step": 12600
|
| 1775 |
}
|
| 1776 |
],
|
| 1777 |
"logging_steps": 50,
|