Training in progress, step 24000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/Jun18_17-52-03_85d30a282e1b/1687112276.514311/events.out.tfevents.1687112276.85d30a282e1b.283.1 +3 -0
- runs/Jun18_17-52-03_85d30a282e1b/events.out.tfevents.1687112276.85d30a282e1b.283.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9436f8f8b607b1ee593a8f3b0b8b83e30fe390053f20fd17d06fad5fa1c3800
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a6ca84a97e8c4a7948040e06ceceb51817c0b2cbd17f232026f4dbe0b459e8f
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d8f258a77a5626bb48a10d9b85a8386b171d5dd6f4cd303281f19063ab68e5f
|
3 |
size 14575
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f431083eeb6d9fc00bf62f51e90febacbb78f3e6a99dc9fc4278d2be5a055df8
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4a6d62e8eba1babe503704efa95eb56e2c5e9222374877c14b70b7d65166a96
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -246,11 +246,59 @@
|
|
246 |
"learning_rate": 2.3109751299304977e-05,
|
247 |
"loss": 1.41,
|
248 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
}
|
250 |
],
|
251 |
"max_steps": 38148,
|
252 |
"num_train_epochs": 1,
|
253 |
-
"total_flos": 2.
|
254 |
"trial_name": null,
|
255 |
"trial_params": null
|
256 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6291220601650397,
|
5 |
+
"global_step": 24000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
246 |
"learning_rate": 2.3109751299304977e-05,
|
247 |
"loss": 1.41,
|
248 |
"step": 20000
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 0.54,
|
252 |
+
"learning_rate": 2.208518091776931e-05,
|
253 |
+
"loss": 1.3994,
|
254 |
+
"step": 20500
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.55,
|
258 |
+
"learning_rate": 2.1065551895687553e-05,
|
259 |
+
"loss": 1.402,
|
260 |
+
"step": 21000
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 0.56,
|
264 |
+
"learning_rate": 2.0054610881655616e-05,
|
265 |
+
"loss": 1.3803,
|
266 |
+
"step": 21500
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"epoch": 0.58,
|
270 |
+
"learning_rate": 1.9050020383592233e-05,
|
271 |
+
"loss": 1.396,
|
272 |
+
"step": 22000
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 0.59,
|
276 |
+
"learning_rate": 1.8055516613795243e-05,
|
277 |
+
"loss": 1.3896,
|
278 |
+
"step": 22500
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 0.6,
|
282 |
+
"learning_rate": 1.707278550901278e-05,
|
283 |
+
"loss": 1.3721,
|
284 |
+
"step": 23000
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"epoch": 0.62,
|
288 |
+
"learning_rate": 1.610349304833249e-05,
|
289 |
+
"loss": 1.3559,
|
290 |
+
"step": 23500
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"epoch": 0.63,
|
294 |
+
"learning_rate": 1.5149282428923398e-05,
|
295 |
+
"loss": 1.3745,
|
296 |
+
"step": 24000
|
297 |
}
|
298 |
],
|
299 |
"max_steps": 38148,
|
300 |
"num_train_epochs": 1,
|
301 |
+
"total_flos": 2.6437433869983744e+16,
|
302 |
"trial_name": null,
|
303 |
"trial_params": null
|
304 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:199b3c969edf26c24dbc136ed11ffce7b4ba6fca136ae73bf4ac87b279d10612
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a6ca84a97e8c4a7948040e06ceceb51817c0b2cbd17f232026f4dbe0b459e8f
|
3 |
size 1944201353
|
runs/Jun18_17-52-03_85d30a282e1b/1687112276.514311/events.out.tfevents.1687112276.85d30a282e1b.283.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc065b4819b65fbe0c37ed9ed822a534caee708e5201bd6110c8792e73e3fc0f
|
3 |
+
size 6187
|
runs/Jun18_17-52-03_85d30a282e1b/events.out.tfevents.1687112276.85d30a282e1b.283.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e58edc503b7e7e09ead300ec65b6ae8e84d0206ee2b89ccb7ed0d3fed326b214
|
3 |
+
size 5742
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:199b3c969edf26c24dbc136ed11ffce7b4ba6fca136ae73bf4ac87b279d10612
|
3 |
size 3771
|