Training in progress, step 4000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +37 -5
- pytorch_model.bin +1 -1
- stderr.slurm +0 -0
- stdout.slurm +5 -0
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c18ec9ae19fb67dde669cb881dc949405131436f9cc1761e33a79f52d52e874a
|
3 |
size 995604017
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510396521
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f42fe0c2ec41d656453cec73abd968b21f89daa58209662a04075026845f4c5d
|
3 |
size 510396521
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:430f1248620a7a9f128f21dc3d873f638e60a994e72fbe17888d55a4b3b61863
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b94329f00f4cdc9834d8c689e9e9178c26b5c51fa127b64c75940601b3f9f205
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 4.
|
3 |
-
"best_model_checkpoint": "./ES_corlec/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -38,11 +38,43 @@
|
|
38 |
"eval_samples_per_second": 42.075,
|
39 |
"eval_steps_per_second": 2.631,
|
40 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
}
|
42 |
],
|
43 |
"max_steps": 35308,
|
44 |
"num_train_epochs": 7,
|
45 |
-
"total_flos":
|
46 |
"trial_name": null,
|
47 |
"trial_params": null
|
48 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 4.000585079193115,
|
3 |
+
"best_model_checkpoint": "./ES_corlec/checkpoint-4000",
|
4 |
+
"epoch": 0.7930214115781126,
|
5 |
+
"global_step": 4000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
38 |
"eval_samples_per_second": 42.075,
|
39 |
"eval_steps_per_second": 2.631,
|
40 |
"step": 2000
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 0.5,
|
44 |
+
"learning_rate": 9.344878660134443e-07,
|
45 |
+
"loss": 4.0586,
|
46 |
+
"step": 2500
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 0.59,
|
50 |
+
"learning_rate": 9.202460977554973e-07,
|
51 |
+
"loss": 4.022,
|
52 |
+
"step": 3000
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 0.69,
|
56 |
+
"learning_rate": 9.060043294975503e-07,
|
57 |
+
"loss": 4.0038,
|
58 |
+
"step": 3500
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.79,
|
62 |
+
"learning_rate": 8.917625612396035e-07,
|
63 |
+
"loss": 3.9852,
|
64 |
+
"step": 4000
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 0.79,
|
68 |
+
"eval_loss": 4.000585079193115,
|
69 |
+
"eval_runtime": 962.3175,
|
70 |
+
"eval_samples_per_second": 42.066,
|
71 |
+
"eval_steps_per_second": 2.63,
|
72 |
+
"step": 4000
|
73 |
}
|
74 |
],
|
75 |
"max_steps": 35308,
|
76 |
"num_train_epochs": 7,
|
77 |
+
"total_flos": 4899225600000000.0,
|
78 |
"trial_name": null,
|
79 |
"trial_params": null
|
80 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510396521
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f42fe0c2ec41d656453cec73abd968b21f89daa58209662a04075026845f4c5d
|
3 |
size 510396521
|
stderr.slurm
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
stdout.slurm
CHANGED
@@ -2,3 +2,8 @@
|
|
2 |
{'loss': 4.1911, 'learning_rate': 9.77213170787285e-07, 'epoch': 0.2}
|
3 |
{'loss': 4.1368, 'learning_rate': 9.62971402529338e-07, 'epoch': 0.3}
|
4 |
{'loss': 4.0957, 'learning_rate': 9.487296342713911e-07, 'epoch': 0.4}
|
|
|
|
|
|
|
|
|
|
|
|
2 |
{'loss': 4.1911, 'learning_rate': 9.77213170787285e-07, 'epoch': 0.2}
|
3 |
{'loss': 4.1368, 'learning_rate': 9.62971402529338e-07, 'epoch': 0.3}
|
4 |
{'loss': 4.0957, 'learning_rate': 9.487296342713911e-07, 'epoch': 0.4}
|
5 |
+
{'eval_loss': 4.06580114364624, 'eval_runtime': 962.1135, 'eval_samples_per_second': 42.075, 'eval_steps_per_second': 2.631, 'epoch': 0.4}
|
6 |
+
{'loss': 4.0586, 'learning_rate': 9.344878660134443e-07, 'epoch': 0.5}
|
7 |
+
{'loss': 4.022, 'learning_rate': 9.202460977554973e-07, 'epoch': 0.59}
|
8 |
+
{'loss': 4.0038, 'learning_rate': 9.060043294975503e-07, 'epoch': 0.69}
|
9 |
+
{'loss': 3.9852, 'learning_rate': 8.917625612396035e-07, 'epoch': 0.79}
|