Training in progress, step 115000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6696fd51101b8b446f97216efd740cd8dca5d9899e0d5608d38ebe68564124a6
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7acfbb9356f03261bd2877d45ab22ef9c02da59569f9db1f82891de3e3ae482
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81331f05aa1795f889cfd2b40ff219c8fdff948e6277fe922aebc65f3771eb3e
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:835008d44ba9a14490f4b13cec08e9910d2ee730477121882483fcfc9d2b2ed5
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:651f1774f8cb4443ce02decd81a9ba1b5650a832c53efc73ebe45a8a355aa1ec
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:191e05698666853bac5bf2a9701e521f403845294f32a8b3f961ab6ebb657230
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e9212b05900598668aa51bd4ae0e5835c73f0c086f9c7ff5d685a80972d182f
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:645fabc4590deab471370d6d5d23c19db281be3849642e6d5786d8be30996648
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1326,6 +1326,66 @@
|
|
1326 |
"learning_rate": 0.00013394713943927426,
|
1327 |
"loss": 0.3475,
|
1328 |
"step": 110000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1329 |
}
|
1330 |
],
|
1331 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9574468085106385,
|
5 |
+
"global_step": 115000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1326 |
"learning_rate": 0.00013394713943927426,
|
1327 |
"loss": 0.3475,
|
1328 |
"step": 110000
|
1329 |
+
},
|
1330 |
+
{
|
1331 |
+
"epoch": 1.88,
|
1332 |
+
"learning_rate": 0.00013380702051639005,
|
1333 |
+
"loss": 0.3475,
|
1334 |
+
"step": 110500
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 1.89,
|
1338 |
+
"learning_rate": 0.00013366637266202443,
|
1339 |
+
"loss": 0.3476,
|
1340 |
+
"step": 111000
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 1.9,
|
1344 |
+
"learning_rate": 0.00013352491381339167,
|
1345 |
+
"loss": 0.3473,
|
1346 |
+
"step": 111500
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 1.91,
|
1350 |
+
"learning_rate": 0.00013338292669546847,
|
1351 |
+
"loss": 0.3472,
|
1352 |
+
"step": 112000
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 1.91,
|
1356 |
+
"learning_rate": 0.00013324041270961035,
|
1357 |
+
"loss": 0.3471,
|
1358 |
+
"step": 112500
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 1.92,
|
1362 |
+
"learning_rate": 0.0001330979464650115,
|
1363 |
+
"loss": 0.347,
|
1364 |
+
"step": 113000
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"epoch": 1.93,
|
1368 |
+
"learning_rate": 0.00013295438506151565,
|
1369 |
+
"loss": 0.3468,
|
1370 |
+
"step": 113500
|
1371 |
+
},
|
1372 |
+
{
|
1373 |
+
"epoch": 1.94,
|
1374 |
+
"learning_rate": 0.00013281030101961779,
|
1375 |
+
"loss": 0.3472,
|
1376 |
+
"step": 114000
|
1377 |
+
},
|
1378 |
+
{
|
1379 |
+
"epoch": 1.95,
|
1380 |
+
"learning_rate": 0.00013266569576136922,
|
1381 |
+
"loss": 0.3476,
|
1382 |
+
"step": 114500
|
1383 |
+
},
|
1384 |
+
{
|
1385 |
+
"epoch": 1.96,
|
1386 |
+
"learning_rate": 0.0001325205707139655,
|
1387 |
+
"loss": 0.3467,
|
1388 |
+
"step": 115000
|
1389 |
}
|
1390 |
],
|
1391 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7acfbb9356f03261bd2877d45ab22ef9c02da59569f9db1f82891de3e3ae482
|
3 |
size 201355195
|