Training in progress, step 115000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a96985558aa5baa3768dacbd21de519d7edb12c54d55f8abd299a3b320acbb7d
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc88d694ab43b269923f499ae0a196ce2b321aa43a08b0d209b0e1477245f397
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07a7dac0331a08ec7b759a3fbdf60d9ea4d23459b88bddb43ada180d7867c450
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24efc479c2253a8a21ba1161756cb8b2bf35ec1aa44838afa8b2146dfd677c46
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:278b91d2207fc5bd9fc791eaad3ba2059b2b40bb15ac1b33c3d8d26a2480b4c6
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9265d98dfcf21504577db90016b54e00c9a802b755b13a7cf3007e368c828843
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e069c734b57a08e7fd480b498687186c300b89341d1abd5cdeb4b7da729f2061
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b20222b237166da7dfe5fc6c7263cb379326118cd83ed4c2b38174be9bdc4ee6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1326,6 +1326,66 @@
|
|
1326 |
"learning_rate": 0.00013394882101213176,
|
1327 |
"loss": 0.3497,
|
1328 |
"step": 110000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1329 |
}
|
1330 |
],
|
1331 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9574386600965097,
|
5 |
+
"global_step": 115000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1326 |
"learning_rate": 0.00013394882101213176,
|
1327 |
"loss": 0.3497,
|
1328 |
"step": 110000
|
1329 |
+
},
|
1330 |
+
{
|
1331 |
+
"epoch": 1.88,
|
1332 |
+
"learning_rate": 0.00013380842713932233,
|
1333 |
+
"loss": 0.3495,
|
1334 |
+
"step": 110500
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 1.89,
|
1338 |
+
"learning_rate": 0.00013366750219906035,
|
1339 |
+
"loss": 0.3499,
|
1340 |
+
"step": 111000
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 1.9,
|
1344 |
+
"learning_rate": 0.00013352604758221816,
|
1345 |
+
"loss": 0.3494,
|
1346 |
+
"step": 111500
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 1.91,
|
1350 |
+
"learning_rate": 0.00013338406468489563,
|
1351 |
+
"loss": 0.3499,
|
1352 |
+
"step": 112000
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 1.91,
|
1356 |
+
"learning_rate": 0.0001332415549084067,
|
1357 |
+
"loss": 0.3496,
|
1358 |
+
"step": 112500
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 1.92,
|
1362 |
+
"learning_rate": 0.0001330985196592653,
|
1363 |
+
"loss": 0.349,
|
1364 |
+
"step": 113000
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"epoch": 1.93,
|
1368 |
+
"learning_rate": 0.00013295496034917162,
|
1369 |
+
"loss": 0.3491,
|
1370 |
+
"step": 113500
|
1371 |
+
},
|
1372 |
+
{
|
1373 |
+
"epoch": 1.94,
|
1374 |
+
"learning_rate": 0.00013281087839499807,
|
1375 |
+
"loss": 0.3489,
|
1376 |
+
"step": 114000
|
1377 |
+
},
|
1378 |
+
{
|
1379 |
+
"epoch": 1.95,
|
1380 |
+
"learning_rate": 0.00013266627521877535,
|
1381 |
+
"loss": 0.3485,
|
1382 |
+
"step": 114500
|
1383 |
+
},
|
1384 |
+
{
|
1385 |
+
"epoch": 1.96,
|
1386 |
+
"learning_rate": 0.0001325211522476785,
|
1387 |
+
"loss": 0.3487,
|
1388 |
+
"step": 115000
|
1389 |
}
|
1390 |
],
|
1391 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc88d694ab43b269923f499ae0a196ce2b321aa43a08b0d209b0e1477245f397
|
3 |
size 201355195
|