Training in progress, step 8800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1197,6 +1197,10 @@ You can finetune this model on your own dataset.
|
|
1197 |
| 0.1502 | 8500 | 0.4203 |
|
1198 |
| 0.1511 | 8550 | 0.414 |
|
1199 |
| 0.1520 | 8600 | 0.3436 |
|
|
|
|
|
|
|
|
|
1200 |
|
1201 |
|
1202 |
### Framework Versions
|
|
|
1197 |
| 0.1502 | 8500 | 0.4203 |
|
1198 |
| 0.1511 | 8550 | 0.414 |
|
1199 |
| 0.1520 | 8600 | 0.3436 |
|
1200 |
+
| 0.1529 | 8650 | 0.436 |
|
1201 |
+
| 0.1537 | 8700 | 0.3761 |
|
1202 |
+
| 0.1546 | 8750 | 0.5579 |
|
1203 |
+
| 0.1555 | 8800 | 0.441 |
|
1204 |
|
1205 |
|
1206 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48362ba7fcb9b7f2c1aa7030a50c6258f1f4d6cc4f1665e7ca632872744db5c0
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75a1f68b3f77c2bf8fd6e4f0bdee7b8ce97a773c56cdc7093cbfde49c00998f8
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14cd2e3712404ebb968f80623888d77e60a6a55be515062adfbd0e098660e63a
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d6c0ca588079b6a2e8be7c5b7bc6af97572af78d73d1dce8d6f30fcc361b6a3
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89db5df6d87e4cc0368652d8350a2c32d46bae130899047ae58e000eec6fa2a0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1212,6 +1212,34 @@
|
|
1212 |
"learning_rate": 4.711472384206083e-05,
|
1213 |
"loss": 0.3436,
|
1214 |
"step": 8600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1215 |
}
|
1216 |
],
|
1217 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.15550175823010726,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 8800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1212 |
"learning_rate": 4.711472384206083e-05,
|
1213 |
"loss": 0.3436,
|
1214 |
"step": 8600
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"epoch": 0.1528511600784577,
|
1218 |
+
"grad_norm": 2.2018544673919678,
|
1219 |
+
"learning_rate": 4.706563782372229e-05,
|
1220 |
+
"loss": 0.436,
|
1221 |
+
"step": 8650
|
1222 |
+
},
|
1223 |
+
{
|
1224 |
+
"epoch": 0.15373469279567423,
|
1225 |
+
"grad_norm": 1.5418767929077148,
|
1226 |
+
"learning_rate": 4.701655180538376e-05,
|
1227 |
+
"loss": 0.3761,
|
1228 |
+
"step": 8700
|
1229 |
+
},
|
1230 |
+
{
|
1231 |
+
"epoch": 0.15461822551289076,
|
1232 |
+
"grad_norm": 4.974616050720215,
|
1233 |
+
"learning_rate": 4.6967465787045225e-05,
|
1234 |
+
"loss": 0.5579,
|
1235 |
+
"step": 8750
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"epoch": 0.15550175823010726,
|
1239 |
+
"grad_norm": 1.8653486967086792,
|
1240 |
+
"learning_rate": 4.691837976870668e-05,
|
1241 |
+
"loss": 0.441,
|
1242 |
+
"step": 8800
|
1243 |
}
|
1244 |
],
|
1245 |
"logging_steps": 50,
|