Training in progress, step 30000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893438545
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4cc35e78f4b2875a4e7bc2823bb00b03a35ef0a895f4019b9ee2bc553e87589
|
3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72eceef87bfa3bd9a4ae124be6a652f67324b3717ebee85984495f36f82da2ae
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5fa93e01617d6f205944a831ee335dee07ebb08b04eafcf9562f4cb23a4302a
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39fcc0dca710a70aacc882ac08d1de25fb93138ed23a4e1e7de926a24206dd2c
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcc311eb216d0f91d79274b66b1cc6a0546d030081f7d13ccdc6a5a80b2096b7
|
3 |
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10eef73b54f0ee2610594db97b5e21337619a39459c4b5d554e4fc3069d6003e
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0eff7bfd19bb5de4a804b312a4895c5e9ec017a31baa7a463d8d86ec7115b34c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -154,11 +154,85 @@
|
|
154 |
"eval_samples_per_second": 982.441,
|
155 |
"eval_steps_per_second": 15.719,
|
156 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
}
|
158 |
],
|
159 |
"max_steps": 1000000,
|
160 |
"num_train_epochs": 16,
|
161 |
-
"total_flos":
|
162 |
"trial_name": null,
|
163 |
"trial_params": null
|
164 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4581061890146136,
|
5 |
+
"global_step": 30000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
154 |
"eval_samples_per_second": 982.441,
|
155 |
"eval_steps_per_second": 15.719,
|
156 |
"step": 20000
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.32,
|
160 |
+
"learning_rate": 6.299999999999999e-05,
|
161 |
+
"loss": 0.6074,
|
162 |
+
"step": 21000
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"epoch": 0.34,
|
166 |
+
"learning_rate": 6.599999999999999e-05,
|
167 |
+
"loss": 0.6039,
|
168 |
+
"step": 22000
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 0.35,
|
172 |
+
"learning_rate": 6.9e-05,
|
173 |
+
"loss": 0.6005,
|
174 |
+
"step": 23000
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.37,
|
178 |
+
"learning_rate": 7.199999999999999e-05,
|
179 |
+
"loss": 0.5968,
|
180 |
+
"step": 24000
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 0.38,
|
184 |
+
"learning_rate": 7.5e-05,
|
185 |
+
"loss": 0.5932,
|
186 |
+
"step": 25000
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"epoch": 0.38,
|
190 |
+
"eval_runtime": 1.1249,
|
191 |
+
"eval_samples_per_second": 888.989,
|
192 |
+
"eval_steps_per_second": 14.224,
|
193 |
+
"step": 25000
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"epoch": 0.4,
|
197 |
+
"learning_rate": 7.8e-05,
|
198 |
+
"loss": 0.5912,
|
199 |
+
"step": 26000
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 0.41,
|
203 |
+
"learning_rate": 8.1e-05,
|
204 |
+
"loss": 0.58,
|
205 |
+
"step": 27000
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.43,
|
209 |
+
"learning_rate": 8.4e-05,
|
210 |
+
"loss": 0.5698,
|
211 |
+
"step": 28000
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 0.44,
|
215 |
+
"learning_rate": 8.699999999999999e-05,
|
216 |
+
"loss": 0.5639,
|
217 |
+
"step": 29000
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"epoch": 0.46,
|
221 |
+
"learning_rate": 8.999999999999999e-05,
|
222 |
+
"loss": 0.5601,
|
223 |
+
"step": 30000
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"epoch": 0.46,
|
227 |
+
"eval_runtime": 1.0096,
|
228 |
+
"eval_samples_per_second": 990.512,
|
229 |
+
"eval_steps_per_second": 15.848,
|
230 |
+
"step": 30000
|
231 |
}
|
232 |
],
|
233 |
"max_steps": 1000000,
|
234 |
"num_train_epochs": 16,
|
235 |
+
"total_flos": 2.1030078309104144e+21,
|
236 |
"trial_name": null,
|
237 |
"trial_params": null
|
238 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72eceef87bfa3bd9a4ae124be6a652f67324b3717ebee85984495f36f82da2ae
|
3 |
size 449471589
|