Training in progress, step 120000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:542104b06e65c9ace54b8e711bbac9cc621773a58de8d01b41cd5415fe72b366
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b30fa6c8a84af0d7218464b722bd9fc5359deac800cc03a88336084563eec6c
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:253ec9051983bdc300e9d368605149ab9db6148a7cad5289347f4c3ed6bf0e0b
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d26a68a6639ccae89e06188f10a2fdceb2b6d290e074968df743779292dbaf14
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36d17390300a97335e809dda525ee1fa7b3c57afe94917c7ee65e3550c222c6f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ab3d619cc75d610e67ba5c5005e0fdf4981eb47efeadbcf338f6a49009fb766
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87e2d1745c681c91159da5acdbfd8bc474c3ecf40e467f65b9d5603d68c91173
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2278aa08713ed016c36bd4379ecb42317a1cf7d42a036b9f5bd0ffaee3d8558c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1386,6 +1386,66 @@
|
|
1386 |
"learning_rate": 0.0001325205707139655,
|
1387 |
"loss": 0.3467,
|
1388 |
"step": 115000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1389 |
}
|
1390 |
],
|
1391 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0425531914893615,
|
5 |
+
"global_step": 120000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1386 |
"learning_rate": 0.0001325205707139655,
|
1387 |
"loss": 0.3467,
|
1388 |
"step": 115000
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"epoch": 1.97,
|
1392 |
+
"learning_rate": 0.00013237492730973225,
|
1393 |
+
"loss": 0.3466,
|
1394 |
+
"step": 115500
|
1395 |
+
},
|
1396 |
+
{
|
1397 |
+
"epoch": 1.97,
|
1398 |
+
"learning_rate": 0.00013222876698611107,
|
1399 |
+
"loss": 0.3465,
|
1400 |
+
"step": 116000
|
1401 |
+
},
|
1402 |
+
{
|
1403 |
+
"epoch": 1.98,
|
1404 |
+
"learning_rate": 0.00013208209118564532,
|
1405 |
+
"loss": 0.346,
|
1406 |
+
"step": 116500
|
1407 |
+
},
|
1408 |
+
{
|
1409 |
+
"epoch": 1.99,
|
1410 |
+
"learning_rate": 0.000131934901355966,
|
1411 |
+
"loss": 0.3463,
|
1412 |
+
"step": 117000
|
1413 |
+
},
|
1414 |
+
{
|
1415 |
+
"epoch": 2.0,
|
1416 |
+
"learning_rate": 0.00013178719894977722,
|
1417 |
+
"loss": 0.3461,
|
1418 |
+
"step": 117500
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"epoch": 2.01,
|
1422 |
+
"learning_rate": 0.0001316392823610171,
|
1423 |
+
"loss": 0.3459,
|
1424 |
+
"step": 118000
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 2.02,
|
1428 |
+
"learning_rate": 0.00013149085814998288,
|
1429 |
+
"loss": 0.3459,
|
1430 |
+
"step": 118500
|
1431 |
+
},
|
1432 |
+
{
|
1433 |
+
"epoch": 2.03,
|
1434 |
+
"learning_rate": 0.00013134192777570792,
|
1435 |
+
"loss": 0.3459,
|
1436 |
+
"step": 119000
|
1437 |
+
},
|
1438 |
+
{
|
1439 |
+
"epoch": 2.03,
|
1440 |
+
"learning_rate": 0.00013119219272737532,
|
1441 |
+
"loss": 0.3455,
|
1442 |
+
"step": 119500
|
1443 |
+
},
|
1444 |
+
{
|
1445 |
+
"epoch": 2.04,
|
1446 |
+
"learning_rate": 0.00013104195243276762,
|
1447 |
+
"loss": 0.3452,
|
1448 |
+
"step": 120000
|
1449 |
}
|
1450 |
],
|
1451 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b30fa6c8a84af0d7218464b722bd9fc5359deac800cc03a88336084563eec6c
|
3 |
size 201355195
|