mprzibilla
commited on
Commit
•
850d02e
1
Parent(s):
5efcd40
Training in progress, epoch 9
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +20 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 721655813
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea7bd9ce5d076e1d98b64a658583e871dc04d7a86c8ca33bc981277cc9fb505a
|
3 |
size 721655813
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 377643361
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11917b780f45b92bb07a8a1c486fe3d266711b5787267bcb29c4826f2b2058e7
|
3 |
size 377643361
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cecab2a299f97e539430f47e61a6d993f61fb978764f9d37e71128ad8d33820
|
3 |
size 14639
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd49b5262d67edc30329678d9505044ee97620da0bca221fad9d74670af91f12
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f72648033857a02de092400c8fda13cd35b3981f24cef90f1182b38e594440ce
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -142,11 +142,28 @@
|
|
142 |
"eval_samples_per_second": 26.443,
|
143 |
"eval_steps_per_second": 3.379,
|
144 |
"step": 2208
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
}
|
146 |
],
|
147 |
"max_steps": 4140,
|
148 |
"num_train_epochs": 15,
|
149 |
-
"total_flos": 1.
|
150 |
"trial_name": null,
|
151 |
"trial_params": null
|
152 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.0,
|
5 |
+
"global_step": 2484,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
142 |
"eval_samples_per_second": 26.443,
|
143 |
"eval_steps_per_second": 3.379,
|
144 |
"step": 2208
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 9.0,
|
148 |
+
"learning_rate": 4.2283244342740914e-05,
|
149 |
+
"loss": 1.297,
|
150 |
+
"step": 2484
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"epoch": 9.0,
|
154 |
+
"eval_cer": 0.7525252525252525,
|
155 |
+
"eval_loss": 2.8890116214752197,
|
156 |
+
"eval_new_wer": 0.5611111111111111,
|
157 |
+
"eval_old_wer": 1.0,
|
158 |
+
"eval_runtime": 6.7075,
|
159 |
+
"eval_samples_per_second": 26.836,
|
160 |
+
"eval_steps_per_second": 3.429,
|
161 |
+
"step": 2484
|
162 |
}
|
163 |
],
|
164 |
"max_steps": 4140,
|
165 |
"num_train_epochs": 15,
|
166 |
+
"total_flos": 1.806990580084416e+18,
|
167 |
"trial_name": null,
|
168 |
"trial_params": null
|
169 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 377643361
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11917b780f45b92bb07a8a1c486fe3d266711b5787267bcb29c4826f2b2058e7
|
3 |
size 377643361
|