Training in progress, step 9400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1209,6 +1209,10 @@ You can finetune this model on your own dataset.
|
|
1209 |
| 0.1608 | 9100 | 0.3634 |
|
1210 |
| 0.1617 | 9150 | 0.4252 |
|
1211 |
| 0.1626 | 9200 | 0.3899 |
|
|
|
|
|
|
|
|
|
1212 |
|
1213 |
|
1214 |
### Framework Versions
|
|
|
1209 |
| 0.1608 | 9100 | 0.3634 |
|
1210 |
| 0.1617 | 9150 | 0.4252 |
|
1211 |
| 0.1626 | 9200 | 0.3899 |
|
1212 |
+
| 0.1635 | 9250 | 0.4335 |
|
1213 |
+
| 0.1643 | 9300 | 0.4204 |
|
1214 |
+
| 0.1652 | 9350 | 0.5576 |
|
1215 |
+
| 0.1661 | 9400 | 0.4712 |
|
1216 |
|
1217 |
|
1218 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e974a383273411df15409a82c6a1b9f0831a59d058b967a3d435ccbc1f48583
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b870ccf7b86ba6d235904225aaace7fb586b191e7940b6dc707df695b5dd9282
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7adf907e945b62166980022a68a400535d3ff6272023bde8d2c138ada9021e2
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12e1fb2bcfe11259f3983e51e8c93f656c6150bd91d84aeb3343965e94c4460e
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d745bf9d75c40b1bcb1a39a8a08fa030d79ce77aa08ff337210102fd029a211
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1296,6 +1296,34 @@
|
|
1296 |
"learning_rate": 4.6525691621998393e-05,
|
1297 |
"loss": 0.3899,
|
1298 |
"step": 9200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1299 |
}
|
1300 |
],
|
1301 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.1661041508367055,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 9400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1296 |
"learning_rate": 4.6525691621998393e-05,
|
1297 |
"loss": 0.3899,
|
1298 |
"step": 9200
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 0.16345355268505593,
|
1302 |
+
"grad_norm": 1.8728185892105103,
|
1303 |
+
"learning_rate": 4.647660560365986e-05,
|
1304 |
+
"loss": 0.4335,
|
1305 |
+
"step": 9250
|
1306 |
+
},
|
1307 |
+
{
|
1308 |
+
"epoch": 0.16433708540227246,
|
1309 |
+
"grad_norm": 1.977250576019287,
|
1310 |
+
"learning_rate": 4.642751958532132e-05,
|
1311 |
+
"loss": 0.4204,
|
1312 |
+
"step": 9300
|
1313 |
+
},
|
1314 |
+
{
|
1315 |
+
"epoch": 0.16522061811948896,
|
1316 |
+
"grad_norm": 4.992434978485107,
|
1317 |
+
"learning_rate": 4.637843356698278e-05,
|
1318 |
+
"loss": 0.5576,
|
1319 |
+
"step": 9350
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"epoch": 0.1661041508367055,
|
1323 |
+
"grad_norm": 1.673086166381836,
|
1324 |
+
"learning_rate": 4.6329347548644245e-05,
|
1325 |
+
"loss": 0.4712,
|
1326 |
+
"step": 9400
|
1327 |
}
|
1328 |
],
|
1329 |
"logging_steps": 50,
|