jimregan commited on
Commit
d568a81
1 Parent(s): 371d687

current best

Browse files
Files changed (7) hide show
  1. config.json +2 -2
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +2 -2
  4. scheduler.pt +3 -0
  5. trainer_state.json +394 -0
  6. training_args.bin +1 -1
  7. vocab.json +1 -1
config.json CHANGED
@@ -70,7 +70,7 @@
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
- "pad_token_id": 33,
74
  "transformers_version": "4.5.0.dev0",
75
- "vocab_size": 34
76
  }
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
+ "pad_token_id": 35,
74
  "transformers_version": "4.5.0.dev0",
75
+ "vocab_size": 36
76
  }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0fadf6bcb1f30106e97b1487a9ce13a59d541a1de164dec82e44118301582a
3
+ size 2490372359
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce360fd780e088d740adec796f104e48f31161946276c10489e45ae3da26bd6b
3
- size 1262073239
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a2400272c2501581a3a85604d1498b688acb511964fe7c4d279807fe9f946f
3
+ size 1262081431
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803dc2c7dbe95fa79644abcf677114107a5f278f5f4d0b7535a7d91cd4fff228
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 11.297071129707113,
5
+ "global_step": 10800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.42,
12
+ "learning_rate": 0.00023999999999999998,
13
+ "loss": 6.6124,
14
+ "step": 400
15
+ },
16
+ {
17
+ "epoch": 0.42,
18
+ "eval_loss": 3.268874168395996,
19
+ "eval_runtime": 47.7642,
20
+ "eval_samples_per_second": 10.594,
21
+ "eval_wer": 0.9499561018437226,
22
+ "step": 400
23
+ },
24
+ {
25
+ "epoch": 0.84,
26
+ "learning_rate": 0.0002924547283702213,
27
+ "loss": Infinity,
28
+ "step": 800
29
+ },
30
+ {
31
+ "epoch": 0.84,
32
+ "eval_loss": 1.5890493392944336,
33
+ "eval_runtime": 46.9733,
34
+ "eval_samples_per_second": 10.772,
35
+ "eval_wer": 0.9242025168276266,
36
+ "step": 800
37
+ },
38
+ {
39
+ "epoch": 1.26,
40
+ "learning_rate": 0.0002823943661971831,
41
+ "loss": NaN,
42
+ "step": 1200
43
+ },
44
+ {
45
+ "epoch": 1.26,
46
+ "eval_loss": 1.399864673614502,
47
+ "eval_runtime": 49.6014,
48
+ "eval_samples_per_second": 10.201,
49
+ "eval_wer": 0.8653789874158618,
50
+ "step": 1200
51
+ },
52
+ {
53
+ "epoch": 1.67,
54
+ "learning_rate": 0.00027233400402414484,
55
+ "loss": NaN,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 1.67,
60
+ "eval_loss": 1.1861594915390015,
61
+ "eval_runtime": 45.83,
62
+ "eval_samples_per_second": 11.041,
63
+ "eval_wer": 0.8627450980392157,
64
+ "step": 1600
65
+ },
66
+ {
67
+ "epoch": 2.09,
68
+ "learning_rate": 0.0002622736418511066,
69
+ "loss": NaN,
70
+ "step": 2000
71
+ },
72
+ {
73
+ "epoch": 2.09,
74
+ "eval_loss": 1.1273273229599,
75
+ "eval_runtime": 47.5676,
76
+ "eval_samples_per_second": 10.638,
77
+ "eval_wer": 0.8124085455077553,
78
+ "step": 2000
79
+ },
80
+ {
81
+ "epoch": 2.51,
82
+ "learning_rate": 0.0002522132796780684,
83
+ "loss": NaN,
84
+ "step": 2400
85
+ },
86
+ {
87
+ "epoch": 2.51,
88
+ "eval_loss": 1.0673868656158447,
89
+ "eval_runtime": 46.2685,
90
+ "eval_samples_per_second": 10.936,
91
+ "eval_wer": 0.8141644717588528,
92
+ "step": 2400
93
+ },
94
+ {
95
+ "epoch": 2.93,
96
+ "learning_rate": 0.00024215291750503016,
97
+ "loss": NaN,
98
+ "step": 2800
99
+ },
100
+ {
101
+ "epoch": 2.93,
102
+ "eval_loss": 1.012987732887268,
103
+ "eval_runtime": 46.0398,
104
+ "eval_samples_per_second": 10.991,
105
+ "eval_wer": 0.8112379280070237,
106
+ "step": 2800
107
+ },
108
+ {
109
+ "epoch": 3.35,
110
+ "learning_rate": 0.00023209255533199194,
111
+ "loss": NaN,
112
+ "step": 3200
113
+ },
114
+ {
115
+ "epoch": 3.35,
116
+ "eval_loss": 1.0372620820999146,
117
+ "eval_runtime": 47.1969,
118
+ "eval_samples_per_second": 10.721,
119
+ "eval_wer": 0.7860696517412935,
120
+ "step": 3200
121
+ },
122
+ {
123
+ "epoch": 3.77,
124
+ "learning_rate": 0.00022203219315895372,
125
+ "loss": NaN,
126
+ "step": 3600
127
+ },
128
+ {
129
+ "epoch": 3.77,
130
+ "eval_loss": 0.9511893391609192,
131
+ "eval_runtime": 46.2728,
132
+ "eval_samples_per_second": 10.935,
133
+ "eval_wer": 0.7960199004975125,
134
+ "step": 3600
135
+ },
136
+ {
137
+ "epoch": 4.18,
138
+ "learning_rate": 0.00021197183098591548,
139
+ "loss": NaN,
140
+ "step": 4000
141
+ },
142
+ {
143
+ "epoch": 4.18,
144
+ "eval_loss": 0.9626357555389404,
145
+ "eval_runtime": 46.2615,
146
+ "eval_samples_per_second": 10.938,
147
+ "eval_wer": 0.7573895229733685,
148
+ "step": 4000
149
+ },
150
+ {
151
+ "epoch": 4.6,
152
+ "learning_rate": 0.00020191146881287726,
153
+ "loss": NaN,
154
+ "step": 4400
155
+ },
156
+ {
157
+ "epoch": 4.6,
158
+ "eval_loss": 0.9149179458618164,
159
+ "eval_runtime": 47.2032,
160
+ "eval_samples_per_second": 10.72,
161
+ "eval_wer": 0.755340942347088,
162
+ "step": 4400
163
+ },
164
+ {
165
+ "epoch": 5.02,
166
+ "learning_rate": 0.000191851106639839,
167
+ "loss": NaN,
168
+ "step": 4800
169
+ },
170
+ {
171
+ "epoch": 5.02,
172
+ "eval_loss": 0.945462703704834,
173
+ "eval_runtime": 47.6471,
174
+ "eval_samples_per_second": 10.62,
175
+ "eval_wer": 0.7515364354697103,
176
+ "step": 4800
177
+ },
178
+ {
179
+ "epoch": 5.44,
180
+ "learning_rate": 0.0001817907444668008,
181
+ "loss": NaN,
182
+ "step": 5200
183
+ },
184
+ {
185
+ "epoch": 5.44,
186
+ "eval_loss": 0.9152739644050598,
187
+ "eval_runtime": 48.1688,
188
+ "eval_samples_per_second": 10.505,
189
+ "eval_wer": 0.746268656716418,
190
+ "step": 5200
191
+ },
192
+ {
193
+ "epoch": 5.86,
194
+ "learning_rate": 0.00017173038229376258,
195
+ "loss": NaN,
196
+ "step": 5600
197
+ },
198
+ {
199
+ "epoch": 5.86,
200
+ "eval_loss": 0.9364785552024841,
201
+ "eval_runtime": 47.6477,
202
+ "eval_samples_per_second": 10.62,
203
+ "eval_wer": 0.7260755048287972,
204
+ "step": 5600
205
+ },
206
+ {
207
+ "epoch": 6.28,
208
+ "learning_rate": 0.00016167002012072433,
209
+ "loss": NaN,
210
+ "step": 6000
211
+ },
212
+ {
213
+ "epoch": 6.28,
214
+ "eval_loss": 0.9098660349845886,
215
+ "eval_runtime": 46.4104,
216
+ "eval_samples_per_second": 10.903,
217
+ "eval_wer": 0.7380743342112964,
218
+ "step": 6000
219
+ },
220
+ {
221
+ "epoch": 6.69,
222
+ "learning_rate": 0.0001516096579476861,
223
+ "loss": NaN,
224
+ "step": 6400
225
+ },
226
+ {
227
+ "epoch": 6.69,
228
+ "eval_loss": 0.9048272371292114,
229
+ "eval_runtime": 46.2425,
230
+ "eval_samples_per_second": 10.942,
231
+ "eval_wer": 0.7205150717003219,
232
+ "step": 6400
233
+ },
234
+ {
235
+ "epoch": 7.11,
236
+ "learning_rate": 0.00014154929577464787,
237
+ "loss": NaN,
238
+ "step": 6800
239
+ },
240
+ {
241
+ "epoch": 7.11,
242
+ "eval_loss": 0.8889923691749573,
243
+ "eval_runtime": 46.6074,
244
+ "eval_samples_per_second": 10.857,
245
+ "eval_wer": 0.7172958735733099,
246
+ "step": 6800
247
+ },
248
+ {
249
+ "epoch": 7.53,
250
+ "learning_rate": 0.00013148893360160965,
251
+ "loss": NaN,
252
+ "step": 7200
253
+ },
254
+ {
255
+ "epoch": 7.53,
256
+ "eval_loss": 0.870766818523407,
257
+ "eval_runtime": 47.1858,
258
+ "eval_samples_per_second": 10.724,
259
+ "eval_wer": 0.7091015510681885,
260
+ "step": 7200
261
+ },
262
+ {
263
+ "epoch": 7.95,
264
+ "learning_rate": 0.00012142857142857142,
265
+ "loss": NaN,
266
+ "step": 7600
267
+ },
268
+ {
269
+ "epoch": 7.95,
270
+ "eval_loss": 0.8733641505241394,
271
+ "eval_runtime": 47.6751,
272
+ "eval_samples_per_second": 10.614,
273
+ "eval_wer": 0.6971027216856892,
274
+ "step": 7600
275
+ },
276
+ {
277
+ "epoch": 8.37,
278
+ "learning_rate": 0.00011136820925553318,
279
+ "loss": NaN,
280
+ "step": 8000
281
+ },
282
+ {
283
+ "epoch": 8.37,
284
+ "eval_loss": 0.88031005859375,
285
+ "eval_runtime": 47.4892,
286
+ "eval_samples_per_second": 10.655,
287
+ "eval_wer": 0.6947614866842259,
288
+ "step": 8000
289
+ },
290
+ {
291
+ "epoch": 8.79,
292
+ "learning_rate": 0.00010130784708249495,
293
+ "loss": NaN,
294
+ "step": 8400
295
+ },
296
+ {
297
+ "epoch": 8.79,
298
+ "eval_loss": 0.8849018216133118,
299
+ "eval_runtime": 47.5503,
300
+ "eval_samples_per_second": 10.641,
301
+ "eval_wer": 0.6930055604331284,
302
+ "step": 8400
303
+ },
304
+ {
305
+ "epoch": 9.21,
306
+ "learning_rate": 9.124748490945673e-05,
307
+ "loss": NaN,
308
+ "step": 8800
309
+ },
310
+ {
311
+ "epoch": 9.21,
312
+ "eval_loss": 0.8564967513084412,
313
+ "eval_runtime": 46.9348,
314
+ "eval_samples_per_second": 10.781,
315
+ "eval_wer": 0.6903716710564823,
316
+ "step": 8800
317
+ },
318
+ {
319
+ "epoch": 9.62,
320
+ "learning_rate": 8.11871227364185e-05,
321
+ "loss": NaN,
322
+ "step": 9200
323
+ },
324
+ {
325
+ "epoch": 9.62,
326
+ "eval_loss": 0.875277042388916,
327
+ "eval_runtime": 46.126,
328
+ "eval_samples_per_second": 10.97,
329
+ "eval_wer": 0.6871524729294703,
330
+ "step": 9200
331
+ },
332
+ {
333
+ "epoch": 10.04,
334
+ "learning_rate": 7.112676056338028e-05,
335
+ "loss": NaN,
336
+ "step": 9600
337
+ },
338
+ {
339
+ "epoch": 10.04,
340
+ "eval_loss": 0.836927056312561,
341
+ "eval_runtime": 47.5639,
342
+ "eval_samples_per_second": 10.638,
343
+ "eval_wer": 0.6877377816798361,
344
+ "step": 9600
345
+ },
346
+ {
347
+ "epoch": 10.46,
348
+ "learning_rate": 6.106639839034204e-05,
349
+ "loss": NaN,
350
+ "step": 10000
351
+ },
352
+ {
353
+ "epoch": 10.46,
354
+ "eval_loss": 0.8403338193893433,
355
+ "eval_runtime": 47.7824,
356
+ "eval_samples_per_second": 10.59,
357
+ "eval_wer": 0.6798361135498976,
358
+ "step": 10000
359
+ },
360
+ {
361
+ "epoch": 10.88,
362
+ "learning_rate": 5.100603621730382e-05,
363
+ "loss": NaN,
364
+ "step": 10400
365
+ },
366
+ {
367
+ "epoch": 10.88,
368
+ "eval_loss": 0.8528462648391724,
369
+ "eval_runtime": 47.7751,
370
+ "eval_samples_per_second": 10.591,
371
+ "eval_wer": 0.6865671641791045,
372
+ "step": 10400
373
+ },
374
+ {
375
+ "epoch": 11.3,
376
+ "learning_rate": 4.094567404426559e-05,
377
+ "loss": NaN,
378
+ "step": 10800
379
+ },
380
+ {
381
+ "epoch": 11.3,
382
+ "eval_loss": 0.8435601592063904,
383
+ "eval_runtime": 47.6913,
384
+ "eval_samples_per_second": 10.61,
385
+ "eval_wer": 0.6777875329236172,
386
+ "step": 10800
387
+ }
388
+ ],
389
+ "max_steps": 12428,
390
+ "num_train_epochs": 13,
391
+ "total_flos": 2.5666394845611536e+19,
392
+ "trial_name": null,
393
+ "trial_params": null
394
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e153bfab921c0ecf2614bd33d36bf75c3a81ef0c6d9f75e345bc6f4fced7f4b
3
  size 2351
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9395ddedfdd2b3371d6502ca09eab66f617ba9df9ccd12e2714cb71162112c
3
  size 2351
vocab.json CHANGED
@@ -1 +1 @@
1
- {"e": 0, "o": 1, "á": 2, "c": 3, "p": 4, "i": 5, "m": 6, "a": 7, "-": 8, "n": 9, "x": 10, "b": 11, "l": 12, "t": 13, "u": 14, "k": 15, "ó": 16, "v": 17, "g": 19, "é": 20, "h": 21, "r": 22, "í": 23, "d": 24, "y": 25, "f": 26, "ú": 27, "'": 28, "w": 29, "s": 30, "j": 31, "|": 18, "[UNK]": 32, "[PAD]": 33}
1
+ {"a": 0, "á": 1, "b": 2, "c": 3, "d": 4, "e": 5, "é": 6, "f": 7, "g": 8, "h": 9, "i": 10, "í": 11, "j": 12, "k": 13, "l": 14, "m": 15, "n": 16, "o": 17, "ó": 18, "p": 19, "q": 20, "r": 21, "s": 22, "t": 23, "u": 24, "ú": 25, "v": 26, "w": 27, "x": 28, "y": 29, "z": 30, "'": 31, "-": 32, "|": 33, "[UNK]": 34, "[PAD]": 35}