Training in progress, step 140000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:541c09487d9370b723d6f46473ad77d95854f9f277155774d36540daed587c20
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29c9ea605b07acd9d6d6f974a1449b64b38a1f91d2d0376e5cf078cd024be1d7
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b84e894ef7a6033cd0e73130f2ebea7e9ec1ffbcfd8c8b79709a364acfd0ff22
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:846e21193961d3c01650602fd61d067e6d15bb106fa27a0a886ebc7bc071a463
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3963fd41f8e5a2e50a3c7c43d6a3b5ff095e754a9f186db5cd3c2302fcde5d99
|
3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14ea5cea28ed719f191afec655c9dab07c0fb35f9db9c4929b4e91078fef40d5
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5024aa92c1c16069c6562483f940b6c61d0a6604a0773cc4bdb5211b70f77f4
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -968,11 +968,85 @@
|
|
968 |
"eval_samples_per_second": 991.553,
|
969 |
"eval_steps_per_second": 15.865,
|
970 |
"step": 130000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
971 |
}
|
972 |
],
|
973 |
"max_steps": 1000000,
|
974 |
"num_train_epochs": 16,
|
975 |
-
"total_flos": 9.
|
976 |
"trial_name": null,
|
977 |
"trial_params": null
|
978 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.1378288820681965,
|
5 |
+
"global_step": 140000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
968 |
"eval_samples_per_second": 991.553,
|
969 |
"eval_steps_per_second": 15.865,
|
970 |
"step": 130000
|
971 |
+
},
|
972 |
+
{
|
973 |
+
"epoch": 2.0,
|
974 |
+
"learning_rate": 0.00014750372550514533,
|
975 |
+
"loss": 0.3409,
|
976 |
+
"step": 131000
|
977 |
+
},
|
978 |
+
{
|
979 |
+
"epoch": 2.02,
|
980 |
+
"learning_rate": 0.0001474420890809492,
|
981 |
+
"loss": 0.3401,
|
982 |
+
"step": 132000
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"epoch": 2.03,
|
986 |
+
"learning_rate": 0.00014737971512087202,
|
987 |
+
"loss": 0.3396,
|
988 |
+
"step": 133000
|
989 |
+
},
|
990 |
+
{
|
991 |
+
"epoch": 2.05,
|
992 |
+
"learning_rate": 0.00014731660430702552,
|
993 |
+
"loss": 0.339,
|
994 |
+
"step": 134000
|
995 |
+
},
|
996 |
+
{
|
997 |
+
"epoch": 2.06,
|
998 |
+
"learning_rate": 0.00014725275732957937,
|
999 |
+
"loss": 0.3402,
|
1000 |
+
"step": 135000
|
1001 |
+
},
|
1002 |
+
{
|
1003 |
+
"epoch": 2.06,
|
1004 |
+
"eval_runtime": 1.1005,
|
1005 |
+
"eval_samples_per_second": 908.652,
|
1006 |
+
"eval_steps_per_second": 14.538,
|
1007 |
+
"step": 135000
|
1008 |
+
},
|
1009 |
+
{
|
1010 |
+
"epoch": 2.08,
|
1011 |
+
"learning_rate": 0.00014718817488675387,
|
1012 |
+
"loss": 0.3379,
|
1013 |
+
"step": 136000
|
1014 |
+
},
|
1015 |
+
{
|
1016 |
+
"epoch": 2.09,
|
1017 |
+
"learning_rate": 0.00014712285768481235,
|
1018 |
+
"loss": 0.3371,
|
1019 |
+
"step": 137000
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"epoch": 2.11,
|
1023 |
+
"learning_rate": 0.00014705680643805323,
|
1024 |
+
"loss": 0.3368,
|
1025 |
+
"step": 138000
|
1026 |
+
},
|
1027 |
+
{
|
1028 |
+
"epoch": 2.12,
|
1029 |
+
"learning_rate": 0.00014699002186880232,
|
1030 |
+
"loss": 0.3363,
|
1031 |
+
"step": 139000
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"epoch": 2.14,
|
1035 |
+
"learning_rate": 0.00014692250470740503,
|
1036 |
+
"loss": 0.3361,
|
1037 |
+
"step": 140000
|
1038 |
+
},
|
1039 |
+
{
|
1040 |
+
"epoch": 2.14,
|
1041 |
+
"eval_runtime": 1.0104,
|
1042 |
+
"eval_samples_per_second": 989.716,
|
1043 |
+
"eval_steps_per_second": 15.835,
|
1044 |
+
"step": 140000
|
1045 |
}
|
1046 |
],
|
1047 |
"max_steps": 1000000,
|
1048 |
"num_train_epochs": 16,
|
1049 |
+
"total_flos": 9.814023402885625e+21,
|
1050 |
"trial_name": null,
|
1051 |
"trial_params": null
|
1052 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29c9ea605b07acd9d6d6f974a1449b64b38a1f91d2d0376e5cf078cd024be1d7
|
3 |
size 449471589
|