mprzibilla commited on
Commit
952478b
1 Parent(s): ffa0595

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "mprzibilla/large_base_M14",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-base",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "mprzibilla/large_base_M14",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-base",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2e35834074f19bc5222d3c8af123732ecaf310f7c9f23dccd514a0fefc4c57
3
- size 174443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213bb0e0a90d5954b503e688cc3c3a9aa15741ebd8a352784b1ba115f324844d
3
+ size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fdd83bce0be94c1d45bb977937e9084dac2cb76e797eac4044daa3dc8639b22
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1450349389d08ad66823b39334b010395a34ccee54ae96db36aa7efca4b3e3
3
  size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d5ccf1b0ffdd4638e6e2647e36a4f32f89f322ea80cdd67e65f7a7c51f6a0c
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58da74621a4bbb5bbbe8b99cb57ffcf083b444d150e3a3a43ef88eb4f947ea0d
3
  size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:614dcd144644e40eeba8333744c7822a05c774de62a1e066c6cfdf0f6212d9b6
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3439e56a6be40162c72ef7587f90cdd2fae59b61ca49cc3743f83ca19cfdd8b
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b29d56619f19dd624dc3d9ce779248fcca781dca28f83547cd64f3f5a24ff43b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69329aa5f5ce6722e627e837927ad2e18f7a9b9c2acd8a4165059b6ad1f66c71
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 64050,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.826288456481726e-05,
13
- "loss": 37.9017,
14
  "step": 4270
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_cer": 1.0,
19
- "eval_loss": 3.8080713748931885,
20
- "eval_new_wer": 1.0,
21
- "eval_old_wer": 1.0,
22
- "eval_runtime": 7.7466,
23
- "eval_samples_per_second": 27.109,
24
- "eval_steps_per_second": 3.485,
25
  "step": 4270
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.124539836970814e-05,
30
- "loss": 3.3383,
31
- "step": 8540
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 3.6899821758270264,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 7.6904,
40
- "eval_samples_per_second": 27.307,
41
- "eval_steps_per_second": 3.511,
42
- "step": 8540
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.4227912174599e-05,
47
- "loss": 3.2564,
48
- "step": 12810
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 0.9982683982683983,
53
- "eval_loss": 3.388317823410034,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 7.7217,
57
- "eval_samples_per_second": 27.196,
58
- "eval_steps_per_second": 3.497,
59
- "step": 12810
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.721042597948987e-05,
64
- "loss": 3.104,
65
- "step": 17080
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 0.9939393939393939,
70
- "eval_loss": 3.202686071395874,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 7.9831,
74
- "eval_samples_per_second": 26.306,
75
- "eval_steps_per_second": 3.382,
76
- "step": 17080
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.019622666316067e-05,
81
- "loss": 2.997,
82
- "step": 21350
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 0.9904761904761905,
87
- "eval_loss": 3.177474021911621,
88
- "eval_new_wer": 0.9904761904761905,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 7.7653,
91
- "eval_samples_per_second": 27.043,
92
- "eval_steps_per_second": 3.477,
93
- "step": 21350
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.318202734683146e-05,
98
- "loss": 2.9391,
99
- "step": 25620
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 0.9861471861471861,
104
- "eval_loss": 3.1568691730499268,
105
- "eval_new_wer": 0.9809523809523809,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 7.7615,
108
- "eval_samples_per_second": 27.057,
109
- "eval_steps_per_second": 3.479,
110
- "step": 25620
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.616782803050224e-05,
115
- "loss": 2.8989,
116
- "step": 29890
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 0.9766233766233766,
121
- "eval_loss": 3.164285182952881,
122
- "eval_new_wer": 0.9714285714285714,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 7.8832,
125
- "eval_samples_per_second": 26.639,
126
- "eval_steps_per_second": 3.425,
127
- "step": 29890
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9153628714173024e-05,
132
- "loss": 2.8731,
133
- "step": 34160
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 0.9670995670995671,
138
- "eval_loss": 3.1436803340911865,
139
- "eval_new_wer": 0.9523809523809523,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 7.7748,
142
- "eval_samples_per_second": 27.01,
143
- "eval_steps_per_second": 3.473,
144
- "step": 34160
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.213778595845386e-05,
149
- "loss": 2.8549,
150
- "step": 38430
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 0.9645021645021645,
155
- "eval_loss": 3.1454625129699707,
156
- "eval_new_wer": 0.9428571428571428,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 7.8309,
159
- "eval_samples_per_second": 26.817,
160
- "eval_steps_per_second": 3.448,
161
- "step": 38430
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.512358664212464e-05,
166
- "loss": 2.8407,
167
- "step": 42700
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 0.9627705627705627,
172
- "eval_loss": 3.1407470703125,
173
- "eval_new_wer": 0.9380952380952381,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 7.787,
176
- "eval_samples_per_second": 26.968,
177
- "eval_steps_per_second": 3.467,
178
- "step": 42700
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.8107743886405467e-05,
183
- "loss": 2.8301,
184
- "step": 46970
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 0.9601731601731601,
189
- "eval_loss": 3.141080141067505,
190
- "eval_new_wer": 0.9333333333333333,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 7.8547,
193
- "eval_samples_per_second": 26.735,
194
- "eval_steps_per_second": 3.437,
195
- "step": 46970
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1093544570076258e-05,
200
- "loss": 2.8215,
201
- "step": 51240
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 0.9601731601731601,
206
- "eval_loss": 3.1361286640167236,
207
- "eval_new_wer": 0.9380952380952381,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 7.7971,
210
- "eval_samples_per_second": 26.933,
211
- "eval_steps_per_second": 3.463,
212
- "step": 51240
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4079345253747043e-05,
217
- "loss": 2.8178,
218
- "step": 55510
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 0.9567099567099567,
223
- "eval_loss": 3.1265242099761963,
224
- "eval_new_wer": 0.9285714285714286,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 7.8518,
227
- "eval_samples_per_second": 26.746,
228
- "eval_steps_per_second": 3.439,
229
- "step": 55510
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.065145937417829e-06,
234
- "loss": 2.816,
235
- "step": 59780
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 0.9575757575757575,
240
- "eval_loss": 3.1279547214508057,
241
- "eval_new_wer": 0.9238095238095239,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 8.6197,
244
- "eval_samples_per_second": 24.363,
245
- "eval_steps_per_second": 3.132,
246
- "step": 59780
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 4.930318169865895e-08,
251
- "loss": 2.8106,
252
- "step": 64050
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 0.9593073593073593,
257
- "eval_loss": 3.131213426589966,
258
- "eval_new_wer": 0.9333333333333333,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 8.5832,
261
- "eval_samples_per_second": 24.466,
262
- "eval_steps_per_second": 3.146,
263
- "step": 64050
264
  }
265
  ],
266
  "max_steps": 64050,
267
  "num_train_epochs": 15,
268
- "total_flos": 5.079400634281409e+19,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 4270,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.825795424664739e-05,
13
+ "loss": 7.898,
14
  "step": 4270
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_cer": 0.18354978354978355,
19
+ "eval_loss": 0.892418622970581,
20
+ "eval_new_wer": 0.1,
21
+ "eval_old_wer": 0.4714285714285714,
22
+ "eval_runtime": 8.5895,
23
+ "eval_samples_per_second": 24.448,
24
+ "eval_steps_per_second": 3.143,
25
  "step": 4270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 64050,
29
  "num_train_epochs": 15,
30
+ "total_flos": 3.2353018871683277e+18,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad917b31fae4bcf0c67ae1beb47de0804b8ba68209481d92e5d1d35cad1bd196
3
  size 3451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad40ee222896fd4a776c83ffc757e3dbaa79c87f1bf52a69de9306ee72fdb26
3
  size 3451
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fdd83bce0be94c1d45bb977937e9084dac2cb76e797eac4044daa3dc8639b22
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1450349389d08ad66823b39334b010395a34ccee54ae96db36aa7efca4b3e3
3
  size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad917b31fae4bcf0c67ae1beb47de0804b8ba68209481d92e5d1d35cad1bd196
3
  size 3451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad40ee222896fd4a776c83ffc757e3dbaa79c87f1bf52a69de9306ee72fdb26
3
  size 3451