Training in progress, step 36400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1241,6 +1241,10 @@ You can finetune this model on your own dataset.
|
|
1241 |
| 0.6379 | 36100 | 0.3065 |
|
1242 |
| 0.6388 | 36150 | 0.3696 |
|
1243 |
| 0.6397 | 36200 | 0.2626 |
|
|
|
|
|
|
|
|
|
1244 |
|
1245 |
|
1246 |
### Framework Versions
|
|
|
1241 |
| 0.6379 | 36100 | 0.3065 |
|
1242 |
| 0.6388 | 36150 | 0.3696 |
|
1243 |
| 0.6397 | 36200 | 0.2626 |
|
1244 |
+
| 0.6406 | 36250 | 0.2681 |
|
1245 |
+
| 0.6414 | 36300 | 0.3015 |
|
1246 |
+
| 0.6423 | 36350 | 0.3587 |
|
1247 |
+
| 0.6432 | 36400 | 0.4138 |
|
1248 |
|
1249 |
|
1250 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6fa930b0dab86162d7efbd96968c6c8ec876ad504bda8baf56c94db0aa7a0fe
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fac4d3dd23450aee08424612d1db499ef9235974098ad78434fec72fecee4d6d
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:314b323b3a511d67b15dea57468e46e599190af351ebf5549ffaf80b2c3a5647
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ccfc3ca931bcded792f304859dd31e568836efb9070c3920cd3b589d1e21087
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c17ed076975a0abe48817667748f3429c8046eb3b0e6862704beb7e23213867
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -5076,6 +5076,34 @@
|
|
5076 |
"learning_rate": 2.0031022363589955e-05,
|
5077 |
"loss": 0.2626,
|
5078 |
"step": 36200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5079 |
}
|
5080 |
],
|
5081 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.6432118181336255,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 36400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
5076 |
"learning_rate": 2.0031022363589955e-05,
|
5077 |
"loss": 0.2626,
|
5078 |
"step": 36200
|
5079 |
+
},
|
5080 |
+
{
|
5081 |
+
"epoch": 0.6405612199819759,
|
5082 |
+
"grad_norm": 1.4499566555023193,
|
5083 |
+
"learning_rate": 1.998193634525142e-05,
|
5084 |
+
"loss": 0.2681,
|
5085 |
+
"step": 36250
|
5086 |
+
},
|
5087 |
+
{
|
5088 |
+
"epoch": 0.6414447526991924,
|
5089 |
+
"grad_norm": 1.7677289247512817,
|
5090 |
+
"learning_rate": 1.9932850326912884e-05,
|
5091 |
+
"loss": 0.3015,
|
5092 |
+
"step": 36300
|
5093 |
+
},
|
5094 |
+
{
|
5095 |
+
"epoch": 0.642328285416409,
|
5096 |
+
"grad_norm": 2.6856095790863037,
|
5097 |
+
"learning_rate": 1.9883764308574346e-05,
|
5098 |
+
"loss": 0.3587,
|
5099 |
+
"step": 36350
|
5100 |
+
},
|
5101 |
+
{
|
5102 |
+
"epoch": 0.6432118181336255,
|
5103 |
+
"grad_norm": 1.9422292709350586,
|
5104 |
+
"learning_rate": 1.983467829023581e-05,
|
5105 |
+
"loss": 0.4138,
|
5106 |
+
"step": 36400
|
5107 |
}
|
5108 |
],
|
5109 |
"logging_steps": 50,
|