“Alok” commited on
Commit
29518b8
1 Parent(s): f3459cb

“baseline-0.1”

Browse files
Files changed (7) hide show
  1. config.json +3 -3
  2. optimizer.pt +2 -2
  3. pytorch_model.bin +2 -2
  4. scheduler.pt +1 -1
  5. trainer_state.json +54 -284
  6. training_args.bin +2 -2
  7. vocab.json +1 -1
config.json CHANGED
@@ -36,7 +36,7 @@
36
  2
37
  ],
38
  "ctc_loss_reduction": "mean",
39
- "ctc_zero_infinity": false,
40
  "do_stable_layer_norm": true,
41
  "eos_token_id": 2,
42
  "feat_extract_activation": "gelu",
@@ -70,7 +70,7 @@
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
- "pad_token_id": 40,
74
  "transformers_version": "4.5.0.dev0",
75
- "vocab_size": 41
76
  }
36
  2
37
  ],
38
  "ctc_loss_reduction": "mean",
39
+ "ctc_zero_infinity": true,
40
  "do_stable_layer_norm": true,
41
  "eos_token_id": 2,
42
  "feat_extract_activation": "gelu",
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
+ "pad_token_id": 38,
74
  "transformers_version": "4.5.0.dev0",
75
+ "vocab_size": 39
76
  }
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce45012be135497e4b16ab8654d1a24b97bb14be98b5fabf07fdcff635dcf3e0
3
- size 1711
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d31ec004aaf1d538f6e31e243606338c02541fb9ebbd8484b070da2cc3e584d
3
+ size 2490396935
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a7772a1884e576f8ce8b03059b788e2f7a734edad5a45f3676945b1b37aba5f
3
- size 1262101912
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b052db94c38d86377adc7920c03bba67a2472ad264f2dcb1a53e25cff59a16
3
+ size 1262093719
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42d008c9c215de3fb87e964d070febd87668726621c0db21bca9ed9eda04b74d
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64543182043f9cdb3527195be935ec8f62e7dc4e7d6bad306616415bb49fc302
3
  size 623
trainer_state.json CHANGED
@@ -1,316 +1,86 @@
1
  {
2
- "best_metric": 1.0,
3
- "best_model_checkpoint": "../wav2vec2-large-xlsr-53-sw/checkpoint-154",
4
- "epoch": 2.9967637540453076,
5
- "global_step": 462,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.06,
12
- "learning_rate": 0.00015,
13
- "loss": Infinity,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.13,
18
- "learning_rate": 0.0003,
19
- "loss": NaN,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.19,
24
- "learning_rate": 0.000296,
25
- "loss": NaN,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.26,
30
- "learning_rate": 0.000292,
31
- "loss": NaN,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.32,
36
- "learning_rate": 0.00028799999999999995,
37
- "loss": NaN,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.39,
42
- "learning_rate": 0.00028399999999999996,
43
- "loss": NaN,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.45,
48
- "learning_rate": 0.00028,
49
- "loss": NaN,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.52,
54
- "learning_rate": 0.000276,
55
- "loss": NaN,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.58,
60
- "learning_rate": 0.00027199999999999994,
61
- "loss": NaN,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.65,
66
- "learning_rate": 0.00026799999999999995,
67
- "loss": NaN,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.71,
72
- "learning_rate": 0.00026399999999999997,
73
- "loss": NaN,
74
- "step": 110
75
- },
76
- {
77
- "epoch": 0.78,
78
- "learning_rate": 0.00026,
79
- "loss": NaN,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 0.84,
84
- "learning_rate": 0.000256,
85
- "loss": NaN,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 0.91,
90
- "learning_rate": 0.00025199999999999995,
91
- "loss": NaN,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 0.97,
96
- "learning_rate": 0.00024799999999999996,
97
- "loss": NaN,
98
- "step": 150
99
- },
100
- {
101
- "epoch": 1.0,
102
- "eval_loss": Infinity,
103
- "eval_runtime": 358.7961,
104
- "eval_samples_per_second": 5.549,
105
- "eval_wer": 1.0,
106
- "step": 154
107
- },
108
- {
109
- "epoch": 1.04,
110
- "learning_rate": 0.000244,
111
- "loss": NaN,
112
- "step": 160
113
- },
114
- {
115
- "epoch": 1.1,
116
  "learning_rate": 0.00023999999999999998,
117
- "loss": NaN,
118
- "step": 170
119
- },
120
- {
121
- "epoch": 1.17,
122
- "learning_rate": 0.00023599999999999996,
123
- "loss": NaN,
124
- "step": 180
125
- },
126
- {
127
- "epoch": 1.23,
128
- "learning_rate": 0.00023199999999999997,
129
- "loss": NaN,
130
- "step": 190
131
  },
132
  {
133
  "epoch": 1.3,
134
- "learning_rate": 0.00022799999999999999,
135
- "loss": NaN,
136
- "step": 200
137
- },
138
- {
139
- "epoch": 1.36,
140
- "learning_rate": 0.000224,
141
- "loss": NaN,
142
- "step": 210
143
- },
144
- {
145
- "epoch": 1.43,
146
- "learning_rate": 0.00021999999999999995,
147
- "loss": NaN,
148
- "step": 220
149
- },
150
- {
151
- "epoch": 1.49,
152
- "learning_rate": 0.00021599999999999996,
153
- "loss": NaN,
154
- "step": 230
155
- },
156
- {
157
- "epoch": 1.56,
158
- "learning_rate": 0.00021199999999999998,
159
- "loss": NaN,
160
- "step": 240
161
- },
162
- {
163
- "epoch": 1.62,
164
- "learning_rate": 0.000208,
165
- "loss": NaN,
166
- "step": 250
167
- },
168
- {
169
- "epoch": 1.69,
170
- "learning_rate": 0.000204,
171
- "loss": NaN,
172
- "step": 260
173
- },
174
- {
175
- "epoch": 1.75,
176
- "learning_rate": 0.00019999999999999998,
177
- "loss": NaN,
178
- "step": 270
179
- },
180
- {
181
- "epoch": 1.82,
182
- "learning_rate": 0.00019599999999999997,
183
- "loss": NaN,
184
- "step": 280
185
- },
186
- {
187
- "epoch": 1.88,
188
- "learning_rate": 0.00019199999999999998,
189
- "loss": NaN,
190
- "step": 290
191
- },
192
- {
193
- "epoch": 1.94,
194
- "learning_rate": 0.000188,
195
- "loss": NaN,
196
- "step": 300
197
- },
198
- {
199
- "epoch": 2.0,
200
- "eval_loss": Infinity,
201
- "eval_runtime": 374.8395,
202
- "eval_samples_per_second": 5.312,
203
- "eval_wer": 1.0,
204
- "step": 308
205
- },
206
- {
207
- "epoch": 2.01,
208
- "learning_rate": 0.00018399999999999997,
209
- "loss": NaN,
210
- "step": 310
211
- },
212
- {
213
- "epoch": 2.08,
214
- "learning_rate": 0.00017999999999999998,
215
- "loss": NaN,
216
- "step": 320
217
- },
218
- {
219
- "epoch": 2.14,
220
- "learning_rate": 0.000176,
221
- "loss": NaN,
222
- "step": 330
223
- },
224
- {
225
- "epoch": 2.21,
226
- "learning_rate": 0.000172,
227
- "loss": NaN,
228
- "step": 340
229
- },
230
- {
231
- "epoch": 2.27,
232
- "learning_rate": 0.000168,
233
- "loss": NaN,
234
- "step": 350
235
- },
236
- {
237
- "epoch": 2.34,
238
- "learning_rate": 0.00016399999999999997,
239
- "loss": NaN,
240
- "step": 360
241
- },
242
- {
243
- "epoch": 2.4,
244
- "learning_rate": 0.00015999999999999999,
245
- "loss": NaN,
246
- "step": 370
247
- },
248
- {
249
- "epoch": 2.47,
250
- "learning_rate": 0.000156,
251
- "loss": NaN,
252
- "step": 380
253
- },
254
- {
255
- "epoch": 2.53,
256
- "learning_rate": 0.000152,
257
- "loss": NaN,
258
- "step": 390
259
  },
260
  {
261
  "epoch": 2.6,
262
- "learning_rate": 0.000148,
263
- "loss": NaN,
264
- "step": 400
265
  },
266
  {
267
- "epoch": 2.66,
268
- "learning_rate": 0.00014399999999999998,
269
- "loss": NaN,
270
- "step": 410
 
 
271
  },
272
  {
273
- "epoch": 2.72,
274
- "learning_rate": 0.00014,
275
- "loss": NaN,
276
- "step": 420
277
  },
278
  {
279
- "epoch": 2.79,
280
- "learning_rate": 0.00013599999999999997,
281
- "loss": NaN,
282
- "step": 430
 
 
283
  },
284
  {
285
- "epoch": 2.85,
286
- "learning_rate": 0.00013199999999999998,
287
- "loss": NaN,
288
- "step": 440
289
  },
290
  {
291
- "epoch": 2.92,
292
- "learning_rate": 0.000128,
293
- "loss": NaN,
294
- "step": 450
 
 
295
  },
296
  {
297
- "epoch": 2.98,
298
- "learning_rate": 0.00012399999999999998,
299
- "loss": NaN,
300
- "step": 460
301
  },
302
  {
303
- "epoch": 3.0,
304
- "eval_loss": Infinity,
305
- "eval_runtime": 334.0794,
306
- "eval_samples_per_second": 5.96,
307
- "eval_wer": 1.0,
308
- "step": 462
309
  }
310
  ],
311
- "max_steps": 770,
312
- "num_train_epochs": 5,
313
- "total_flos": 1.7250623020466376e+18,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 6.492706645056726,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "learning_rate": 0.00023999999999999998,
13
+ "loss": 3.5498,
14
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 1.3,
18
+ "eval_loss": 0.7558379173278809,
19
+ "eval_runtime": 196.0447,
20
+ "eval_samples_per_second": 10.156,
21
+ "eval_wer": 0.7275141242937853,
22
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
24
  {
25
  "epoch": 2.6,
26
+ "learning_rate": 0.000297029702970297,
27
+ "loss": 0.735,
28
+ "step": 800
29
  },
30
  {
31
+ "epoch": 2.6,
32
+ "eval_loss": 0.4254470467567444,
33
+ "eval_runtime": 196.081,
34
+ "eval_samples_per_second": 10.154,
35
+ "eval_wer": 0.521412429378531,
36
+ "step": 800
37
  },
38
  {
39
+ "epoch": 3.89,
40
+ "learning_rate": 0.00029306930693069307,
41
+ "loss": 0.502,
42
+ "step": 1200
43
  },
44
  {
45
+ "epoch": 3.89,
46
+ "eval_loss": 0.369031697511673,
47
+ "eval_runtime": 193.173,
48
+ "eval_samples_per_second": 10.307,
49
+ "eval_wer": 0.46146892655367233,
50
+ "step": 1200
51
  },
52
  {
53
+ "epoch": 5.19,
54
+ "learning_rate": 0.00028910891089108906,
55
+ "loss": 0.4002,
56
+ "step": 1600
57
  },
58
  {
59
+ "epoch": 5.19,
60
+ "eval_loss": 0.34798651933670044,
61
+ "eval_runtime": 194.1431,
62
+ "eval_samples_per_second": 10.255,
63
+ "eval_wer": 0.4266666666666667,
64
+ "step": 1600
65
  },
66
  {
67
+ "epoch": 6.49,
68
+ "learning_rate": 0.0002851485148514851,
69
+ "loss": 0.3301,
70
+ "step": 2000
71
  },
72
  {
73
+ "epoch": 6.49,
74
+ "eval_loss": 0.34466618299484253,
75
+ "eval_runtime": 194.5915,
76
+ "eval_samples_per_second": 10.232,
77
+ "eval_wer": 0.41836158192090395,
78
+ "step": 2000
79
  }
80
  ],
81
+ "max_steps": 30800,
82
+ "num_train_epochs": 100,
83
+ "total_flos": 6.866081544696079e+18,
84
  "trial_name": null,
85
  "trial_params": null
86
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fcccbfe1b211984cf07a9455560cac8fbc9c69a011acf7c165d7e5331248598
3
- size 2351
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4b6b9216cddae0e2711129cec936f431b1dad296c1b852446679260b47926d
3
+ size 2287
vocab.json CHANGED
@@ -1 +1 @@
1
- {">": 0, "y": 1, "z": 2, "o": 3, "w": 4, "f": 5, "i": 6, "t": 7, "h": 8, "b": 9, "r": 10, "j": 11, "x": 13, "l": 14, "k": 15, "p": 16, "c": 17, "v": 18, "<": 19, "a": 20, "d": 21, "m": 22, "n": 23, "u": 24, "g": 25, "s": 26, "q": 27, "e": 28, "|": 12, "[UNK]": 29, "[PAD]": 30}
1
+ {"q": 0, "s": 1, "3": 2, "'": 3, "n": 4, "y": 5, "c": 7, "_": 8, "k": 9, "5": 10, "l": 11, "o": 12, "0": 13, "j": 14, "6": 15, "d": 16, "1": 17, "9": 18, "u": 19, "g": 20, "b": 21, "p": 22, "m": 23, "v": 24, "2": 25, "f": 26, "i": 27, "t": 28, "r": 29, "w": 30, "z": 31, "a": 32, "x": 33, "h": 34, "7": 35, "e": 36, "|": 6, "[UNK]": 37, "[PAD]": 38}