Training in progress, step 8600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1193,6 +1193,10 @@ You can finetune this model on your own dataset.
|
|
1193 |
| 0.1467 | 8300 | 0.3967 |
|
1194 |
| 0.1475 | 8350 | 0.4911 |
|
1195 |
| 0.1484 | 8400 | 0.4076 |
|
|
|
|
|
|
|
|
|
1196 |
|
1197 |
|
1198 |
### Framework Versions
|
|
|
1193 |
| 0.1467 | 8300 | 0.3967 |
|
1194 |
| 0.1475 | 8350 | 0.4911 |
|
1195 |
| 0.1484 | 8400 | 0.4076 |
|
1196 |
+
| 0.1493 | 8450 | 0.398 |
|
1197 |
+
| 0.1502 | 8500 | 0.4203 |
|
1198 |
+
| 0.1511 | 8550 | 0.414 |
|
1199 |
+
| 0.1520 | 8600 | 0.3436 |
|
1200 |
|
1201 |
|
1202 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03eb7a5af20bd02ca3c82b2b9c70071b68c323ac87a7799ea87cd5139a94eb91
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40262b6becfc7b5455f773e7a9a3bb594892eb51a5604b32e91c90609a89bdfb
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66f5a47fc1fb0afabfab2902b2aa5af7ab84c4d2097ab2d93b83ccc74c03acb1
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b161abd57fc77c436995db62a6c89b28c6220f59213ab301dd9a979ea4c14232
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6c145ff6cd9415e52757f69b1d04fadde604f73651764ac12eda7fa02fc8ae0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1184,6 +1184,34 @@
|
|
1184 |
"learning_rate": 4.731106791541497e-05,
|
1185 |
"loss": 0.4076,
|
1186 |
"step": 8400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1187 |
}
|
1188 |
],
|
1189 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.1519676273612412,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 8600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1184 |
"learning_rate": 4.731106791541497e-05,
|
1185 |
"loss": 0.4076,
|
1186 |
"step": 8400
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 0.14931702920959164,
|
1190 |
+
"grad_norm": 2.1234054565429688,
|
1191 |
+
"learning_rate": 4.7261981897076444e-05,
|
1192 |
+
"loss": 0.398,
|
1193 |
+
"step": 8450
|
1194 |
+
},
|
1195 |
+
{
|
1196 |
+
"epoch": 0.15020056192680814,
|
1197 |
+
"grad_norm": 2.1532113552093506,
|
1198 |
+
"learning_rate": 4.72128958787379e-05,
|
1199 |
+
"loss": 0.4203,
|
1200 |
+
"step": 8500
|
1201 |
+
},
|
1202 |
+
{
|
1203 |
+
"epoch": 0.15108409464402467,
|
1204 |
+
"grad_norm": 1.8909550905227661,
|
1205 |
+
"learning_rate": 4.7163809860399366e-05,
|
1206 |
+
"loss": 0.414,
|
1207 |
+
"step": 8550
|
1208 |
+
},
|
1209 |
+
{
|
1210 |
+
"epoch": 0.1519676273612412,
|
1211 |
+
"grad_norm": 1.9415462017059326,
|
1212 |
+
"learning_rate": 4.711472384206083e-05,
|
1213 |
+
"loss": 0.3436,
|
1214 |
+
"step": 8600
|
1215 |
}
|
1216 |
],
|
1217 |
"logging_steps": 50,
|