Training in progress, step 15000, checkpoint
Browse files- last-checkpoint/global_step15000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step15000/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -3
last-checkpoint/global_step15000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fab0a63b3ac16d6f94d5f0f35a8502fdc5bcc8b548d6aad2c7d44d818ba8b498
|
3 |
+
size 197282509
|
last-checkpoint/global_step15000/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:419156b9b803fc2e093681e99198c30c3374b4ab2c00b50d36abe443fd2e571d
|
3 |
+
size 180416968
|
last-checkpoint/global_step15000/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0d28cbb974efed0531d71b009d0cce220c7a463f2932322c26fdd886eb76612
|
3 |
+
size 180416776
|
last-checkpoint/global_step15000/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c14969eec15404f14580812284af2f9dfdaf3d876b031afdb33a017e8ea88f83
|
3 |
+
size 180416776
|
last-checkpoint/global_step15000/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bec6a62d1338f558c492ba7f375dca5db9d73f85ce67c1225eb6414af7b4c19
|
3 |
+
size 180416904
|
last-checkpoint/global_step15000/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba49ad34558a56e4299a444243b9bfd826b1ab2e1f2cfe0b7d60837554b2cc3f
|
3 |
+
size 180416712
|
last-checkpoint/global_step15000/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05de279c3af47689f94d74717c0d293c73f5846394ddb9c7de03c96cb220e28f
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step15000
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7751a0b7a8cc9049584600df855e963a7058189eba36e4a9a6a6c3bf0dcd0a5a
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6999d7fefb97e6a9a099cd30916802bc69ef7a40283d48060dd44d4594d50081
|
3 |
+
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcab6b00011077d0bd66c568639f31df0fa198017171e68b52aea1079faea821
|
3 |
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15472
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09b0ae0abb6b790ed8ac27b65cea139e3676b521cf203d1e2285b653dd58c7fb
|
3 |
size 15472
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a4137a91687576c9e4b6b3a040c0affa99daa815a899287efc0849b18471f66
|
3 |
size 15536
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:047b316b318f4373add1f5f3e4149c3d9b4a629a8627239189aa5dd5bb6920b3
|
3 |
size 15536
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9297cb3deb97e247f257c18cfcbeb0473ce2a067efd2926a19f8087aa5515f2c
|
3 |
+
size 15472
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf09ca0b8297e8f1a255617385be78112ae10fd31037fd3bd56d97071a010fe5
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2109,6 +2109,85 @@
|
|
2109 |
"learning_rate": 0.0001,
|
2110 |
"loss": 0.0138,
|
2111 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2112 |
}
|
2113 |
],
|
2114 |
"logging_steps": 50,
|
@@ -2128,7 +2207,7 @@
|
|
2128 |
"attributes": {}
|
2129 |
}
|
2130 |
},
|
2131 |
-
"total_flos": 4.
|
2132 |
"train_batch_size": 64,
|
2133 |
"trial_name": null,
|
2134 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 405.4054054054054,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 15000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2109 |
"learning_rate": 0.0001,
|
2110 |
"loss": 0.0138,
|
2111 |
"step": 14500
|
2112 |
+
},
|
2113 |
+
{
|
2114 |
+
"epoch": 393.2432432432432,
|
2115 |
+
"grad_norm": 0.23838981986045837,
|
2116 |
+
"learning_rate": 0.0001,
|
2117 |
+
"loss": 0.0137,
|
2118 |
+
"step": 14550
|
2119 |
+
},
|
2120 |
+
{
|
2121 |
+
"epoch": 394.5945945945946,
|
2122 |
+
"grad_norm": 0.2961066663265228,
|
2123 |
+
"learning_rate": 0.0001,
|
2124 |
+
"loss": 0.0136,
|
2125 |
+
"step": 14600
|
2126 |
+
},
|
2127 |
+
{
|
2128 |
+
"epoch": 395.94594594594594,
|
2129 |
+
"grad_norm": 0.24311979115009308,
|
2130 |
+
"learning_rate": 0.0001,
|
2131 |
+
"loss": 0.0133,
|
2132 |
+
"step": 14650
|
2133 |
+
},
|
2134 |
+
{
|
2135 |
+
"epoch": 397.2972972972973,
|
2136 |
+
"grad_norm": 0.3343033492565155,
|
2137 |
+
"learning_rate": 0.0001,
|
2138 |
+
"loss": 0.0138,
|
2139 |
+
"step": 14700
|
2140 |
+
},
|
2141 |
+
{
|
2142 |
+
"epoch": 398.64864864864865,
|
2143 |
+
"grad_norm": 0.23256798088550568,
|
2144 |
+
"learning_rate": 0.0001,
|
2145 |
+
"loss": 0.0133,
|
2146 |
+
"step": 14750
|
2147 |
+
},
|
2148 |
+
{
|
2149 |
+
"epoch": 400.0,
|
2150 |
+
"grad_norm": 0.31679514050483704,
|
2151 |
+
"learning_rate": 0.0001,
|
2152 |
+
"loss": 0.0131,
|
2153 |
+
"step": 14800
|
2154 |
+
},
|
2155 |
+
{
|
2156 |
+
"epoch": 401.35135135135135,
|
2157 |
+
"grad_norm": 0.24046526849269867,
|
2158 |
+
"learning_rate": 0.0001,
|
2159 |
+
"loss": 0.0115,
|
2160 |
+
"step": 14850
|
2161 |
+
},
|
2162 |
+
{
|
2163 |
+
"epoch": 402.7027027027027,
|
2164 |
+
"grad_norm": 0.2563251852989197,
|
2165 |
+
"learning_rate": 0.0001,
|
2166 |
+
"loss": 0.0121,
|
2167 |
+
"step": 14900
|
2168 |
+
},
|
2169 |
+
{
|
2170 |
+
"epoch": 404.05405405405406,
|
2171 |
+
"grad_norm": 0.18860304355621338,
|
2172 |
+
"learning_rate": 0.0001,
|
2173 |
+
"loss": 0.0118,
|
2174 |
+
"step": 14950
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 405.4054054054054,
|
2178 |
+
"grad_norm": 0.27949538826942444,
|
2179 |
+
"learning_rate": 0.0001,
|
2180 |
+
"loss": 0.0117,
|
2181 |
+
"step": 15000
|
2182 |
+
},
|
2183 |
+
{
|
2184 |
+
"epoch": 405.4054054054054,
|
2185 |
+
"eval_loss": 0.7209838628768921,
|
2186 |
+
"eval_runtime": 55.6252,
|
2187 |
+
"eval_samples_per_second": 28.171,
|
2188 |
+
"eval_steps_per_second": 0.09,
|
2189 |
+
"eval_wer": 0.19634897049458713,
|
2190 |
+
"step": 15000
|
2191 |
}
|
2192 |
],
|
2193 |
"logging_steps": 50,
|
|
|
2207 |
"attributes": {}
|
2208 |
}
|
2209 |
},
|
2210 |
+
"total_flos": 4.210588042772464e+20,
|
2211 |
"train_batch_size": 64,
|
2212 |
"trial_name": null,
|
2213 |
"trial_params": null
|