leecho commited on
Commit
e412287
1 Parent(s): e36bb34

commit from fleek

Browse files
config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "conv_bias": true,
11
+ "conv_dim": [
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512
19
+ ],
20
+ "conv_kernel": [
21
+ 10,
22
+ 3,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 2,
27
+ 2
28
+ ],
29
+ "conv_stride": [
30
+ 5,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2
37
+ ],
38
+ "ctc_loss_reduction": "mean",
39
+ "ctc_zero_infinity": false,
40
+ "do_stable_layer_norm": true,
41
+ "eos_token_id": 2,
42
+ "feat_extract_activation": "gelu",
43
+ "feat_extract_dropout": 0.0,
44
+ "feat_extract_norm": "layer",
45
+ "feat_proj_dropout": 0.0,
46
+ "final_dropout": 0.0,
47
+ "gradient_checkpointing": true,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 1024,
51
+ "initializer_range": 0.02,
52
+ "intermediate_size": 4096,
53
+ "layer_norm_eps": 1e-05,
54
+ "layerdrop": 0.1,
55
+ "mask_channel_length": 10,
56
+ "mask_channel_min_space": 1,
57
+ "mask_channel_other": 0.0,
58
+ "mask_channel_prob": 0.0,
59
+ "mask_channel_selection": "static",
60
+ "mask_feature_length": 10,
61
+ "mask_feature_prob": 0.0,
62
+ "mask_time_length": 10,
63
+ "mask_time_min_space": 1,
64
+ "mask_time_other": 0.0,
65
+ "mask_time_prob": 0.05,
66
+ "mask_time_selection": "static",
67
+ "model_type": "wav2vec2",
68
+ "num_attention_heads": 16,
69
+ "num_conv_pos_embedding_groups": 16,
70
+ "num_conv_pos_embeddings": 128,
71
+ "num_feat_extract_layers": 7,
72
+ "num_hidden_layers": 24,
73
+ "pad_token_id": 49,
74
+ "transformers_version": "4.4.0",
75
+ "vocab_size": 50
76
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843ef1c076d692fbab55c293fa2d7b7abb30a66da9ba17af87118705b66a1b5a
3
+ size 2490487175
preprocessor_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_size": 1,
4
+ "padding_side": "right",
5
+ "padding_value": 0.0,
6
+ "return_attention_mask": false,
7
+ "sampling_rate": 16000
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cff7c083c2d21af1ae70f91a3628e930e121f213c8db1dd9208aaca3458734f
3
+ size 1262138839
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb8b86bc4e378631af9b45c4f21eb4a7919ab5a185958820da51aa1f8703a89
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,628 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 27.272727272727273,
5
+ "global_step": 3600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 2.3999999999999997e-05,
13
+ "loss": 17.8657,
14
+ "step": 40
15
+ },
16
+ {
17
+ "epoch": 0.61,
18
+ "learning_rate": 4.7999999999999994e-05,
19
+ "loss": 12.104,
20
+ "step": 80
21
+ },
22
+ {
23
+ "epoch": 0.91,
24
+ "learning_rate": 7.199999999999999e-05,
25
+ "loss": 4.7542,
26
+ "step": 120
27
+ },
28
+ {
29
+ "epoch": 1.21,
30
+ "learning_rate": 9.599999999999999e-05,
31
+ "loss": 3.5251,
32
+ "step": 160
33
+ },
34
+ {
35
+ "epoch": 1.52,
36
+ "learning_rate": 0.00011999999999999999,
37
+ "loss": 3.2968,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 1.82,
42
+ "learning_rate": 0.00014399999999999998,
43
+ "loss": 3.1885,
44
+ "step": 240
45
+ },
46
+ {
47
+ "epoch": 2.12,
48
+ "learning_rate": 0.000168,
49
+ "loss": 3.173,
50
+ "step": 280
51
+ },
52
+ {
53
+ "epoch": 2.42,
54
+ "learning_rate": 0.00019199999999999998,
55
+ "loss": 3.1139,
56
+ "step": 320
57
+ },
58
+ {
59
+ "epoch": 2.73,
60
+ "learning_rate": 0.00021599999999999996,
61
+ "loss": 3.103,
62
+ "step": 360
63
+ },
64
+ {
65
+ "epoch": 3.03,
66
+ "learning_rate": 0.00023999999999999998,
67
+ "loss": 3.0862,
68
+ "step": 400
69
+ },
70
+ {
71
+ "epoch": 3.03,
72
+ "eval_loss": 3.177677869796753,
73
+ "eval_runtime": 53.6086,
74
+ "eval_samples_per_second": 22.403,
75
+ "eval_wer": 1.0,
76
+ "step": 400
77
+ },
78
+ {
79
+ "epoch": 3.33,
80
+ "learning_rate": 0.00026399999999999997,
81
+ "loss": 3.0227,
82
+ "step": 440
83
+ },
84
+ {
85
+ "epoch": 3.64,
86
+ "learning_rate": 0.00028799999999999995,
87
+ "loss": 2.6934,
88
+ "step": 480
89
+ },
90
+ {
91
+ "epoch": 3.94,
92
+ "learning_rate": 0.0002982658959537572,
93
+ "loss": 2.1587,
94
+ "step": 520
95
+ },
96
+ {
97
+ "epoch": 4.24,
98
+ "learning_rate": 0.00029479768786127165,
99
+ "loss": 1.7134,
100
+ "step": 560
101
+ },
102
+ {
103
+ "epoch": 4.55,
104
+ "learning_rate": 0.0002913294797687861,
105
+ "loss": 1.4758,
106
+ "step": 600
107
+ },
108
+ {
109
+ "epoch": 4.85,
110
+ "learning_rate": 0.00028786127167630053,
111
+ "loss": 1.311,
112
+ "step": 640
113
+ },
114
+ {
115
+ "epoch": 5.15,
116
+ "learning_rate": 0.000284393063583815,
117
+ "loss": 1.1944,
118
+ "step": 680
119
+ },
120
+ {
121
+ "epoch": 5.45,
122
+ "learning_rate": 0.00028092485549132947,
123
+ "loss": 1.082,
124
+ "step": 720
125
+ },
126
+ {
127
+ "epoch": 5.76,
128
+ "learning_rate": 0.0002774566473988439,
129
+ "loss": 1.024,
130
+ "step": 760
131
+ },
132
+ {
133
+ "epoch": 6.06,
134
+ "learning_rate": 0.00027398843930635835,
135
+ "loss": 0.9718,
136
+ "step": 800
137
+ },
138
+ {
139
+ "epoch": 6.06,
140
+ "eval_loss": 1.0074659585952759,
141
+ "eval_runtime": 53.837,
142
+ "eval_samples_per_second": 22.308,
143
+ "eval_wer": 0.8955532574974147,
144
+ "step": 800
145
+ },
146
+ {
147
+ "epoch": 6.36,
148
+ "learning_rate": 0.0002705202312138728,
149
+ "loss": 0.8674,
150
+ "step": 840
151
+ },
152
+ {
153
+ "epoch": 6.67,
154
+ "learning_rate": 0.0002670520231213873,
155
+ "loss": 0.8369,
156
+ "step": 880
157
+ },
158
+ {
159
+ "epoch": 6.97,
160
+ "learning_rate": 0.0002635838150289017,
161
+ "loss": 0.8437,
162
+ "step": 920
163
+ },
164
+ {
165
+ "epoch": 7.27,
166
+ "learning_rate": 0.00026011560693641616,
167
+ "loss": 0.7578,
168
+ "step": 960
169
+ },
170
+ {
171
+ "epoch": 7.58,
172
+ "learning_rate": 0.00025664739884393063,
173
+ "loss": 0.7275,
174
+ "step": 1000
175
+ },
176
+ {
177
+ "epoch": 7.88,
178
+ "learning_rate": 0.00025317919075144504,
179
+ "loss": 0.7207,
180
+ "step": 1040
181
+ },
182
+ {
183
+ "epoch": 8.18,
184
+ "learning_rate": 0.0002497109826589595,
185
+ "loss": 0.6415,
186
+ "step": 1080
187
+ },
188
+ {
189
+ "epoch": 8.48,
190
+ "learning_rate": 0.000246242774566474,
191
+ "loss": 0.6446,
192
+ "step": 1120
193
+ },
194
+ {
195
+ "epoch": 8.79,
196
+ "learning_rate": 0.00024277456647398842,
197
+ "loss": 0.6243,
198
+ "step": 1160
199
+ },
200
+ {
201
+ "epoch": 9.09,
202
+ "learning_rate": 0.0002393063583815029,
203
+ "loss": 0.6085,
204
+ "step": 1200
205
+ },
206
+ {
207
+ "epoch": 9.09,
208
+ "eval_loss": 0.8931847214698792,
209
+ "eval_runtime": 53.9711,
210
+ "eval_samples_per_second": 22.253,
211
+ "eval_wer": 0.797015807357069,
212
+ "step": 1200
213
+ },
214
+ {
215
+ "epoch": 9.39,
216
+ "learning_rate": 0.00023583815028901733,
217
+ "loss": 0.5279,
218
+ "step": 1240
219
+ },
220
+ {
221
+ "epoch": 9.7,
222
+ "learning_rate": 0.00023236994219653174,
223
+ "loss": 0.5279,
224
+ "step": 1280
225
+ },
226
+ {
227
+ "epoch": 10.0,
228
+ "learning_rate": 0.0002289017341040462,
229
+ "loss": 0.5421,
230
+ "step": 1320
231
+ },
232
+ {
233
+ "epoch": 10.3,
234
+ "learning_rate": 0.00022543352601156065,
235
+ "loss": 0.4856,
236
+ "step": 1360
237
+ },
238
+ {
239
+ "epoch": 10.61,
240
+ "learning_rate": 0.00022196531791907512,
241
+ "loss": 0.5182,
242
+ "step": 1400
243
+ },
244
+ {
245
+ "epoch": 10.91,
246
+ "learning_rate": 0.00021849710982658956,
247
+ "loss": 0.4756,
248
+ "step": 1440
249
+ },
250
+ {
251
+ "epoch": 11.21,
252
+ "learning_rate": 0.00021502890173410403,
253
+ "loss": 0.479,
254
+ "step": 1480
255
+ },
256
+ {
257
+ "epoch": 11.52,
258
+ "learning_rate": 0.00021156069364161847,
259
+ "loss": 0.4419,
260
+ "step": 1520
261
+ },
262
+ {
263
+ "epoch": 11.82,
264
+ "learning_rate": 0.00020809248554913294,
265
+ "loss": 0.448,
266
+ "step": 1560
267
+ },
268
+ {
269
+ "epoch": 12.12,
270
+ "learning_rate": 0.00020462427745664738,
271
+ "loss": 0.4295,
272
+ "step": 1600
273
+ },
274
+ {
275
+ "epoch": 12.12,
276
+ "eval_loss": 0.9030922055244446,
277
+ "eval_runtime": 54.1136,
278
+ "eval_samples_per_second": 22.194,
279
+ "eval_wer": 0.7748559609986704,
280
+ "step": 1600
281
+ },
282
+ {
283
+ "epoch": 12.42,
284
+ "learning_rate": 0.00020115606936416184,
285
+ "loss": 0.3976,
286
+ "step": 1640
287
+ },
288
+ {
289
+ "epoch": 12.73,
290
+ "learning_rate": 0.00019768786127167629,
291
+ "loss": 0.3699,
292
+ "step": 1680
293
+ },
294
+ {
295
+ "epoch": 13.03,
296
+ "learning_rate": 0.00019421965317919073,
297
+ "loss": 0.4159,
298
+ "step": 1720
299
+ },
300
+ {
301
+ "epoch": 13.33,
302
+ "learning_rate": 0.0001907514450867052,
303
+ "loss": 0.3221,
304
+ "step": 1760
305
+ },
306
+ {
307
+ "epoch": 13.64,
308
+ "learning_rate": 0.00018728323699421963,
309
+ "loss": 0.3642,
310
+ "step": 1800
311
+ },
312
+ {
313
+ "epoch": 13.94,
314
+ "learning_rate": 0.0001838150289017341,
315
+ "loss": 0.3832,
316
+ "step": 1840
317
+ },
318
+ {
319
+ "epoch": 14.24,
320
+ "learning_rate": 0.00018034682080924854,
321
+ "loss": 0.3367,
322
+ "step": 1880
323
+ },
324
+ {
325
+ "epoch": 14.55,
326
+ "learning_rate": 0.000176878612716763,
327
+ "loss": 0.3152,
328
+ "step": 1920
329
+ },
330
+ {
331
+ "epoch": 14.85,
332
+ "learning_rate": 0.00017341040462427745,
333
+ "loss": 0.3246,
334
+ "step": 1960
335
+ },
336
+ {
337
+ "epoch": 15.15,
338
+ "learning_rate": 0.00016994219653179192,
339
+ "loss": 0.3012,
340
+ "step": 2000
341
+ },
342
+ {
343
+ "epoch": 15.15,
344
+ "eval_loss": 0.9842168688774109,
345
+ "eval_runtime": 54.3687,
346
+ "eval_samples_per_second": 22.09,
347
+ "eval_wer": 0.7636283055104152,
348
+ "step": 2000
349
+ },
350
+ {
351
+ "epoch": 15.45,
352
+ "learning_rate": 0.00016647398843930633,
353
+ "loss": 0.3051,
354
+ "step": 2040
355
+ },
356
+ {
357
+ "epoch": 15.76,
358
+ "learning_rate": 0.00016300578034682077,
359
+ "loss": 0.2966,
360
+ "step": 2080
361
+ },
362
+ {
363
+ "epoch": 16.06,
364
+ "learning_rate": 0.00015953757225433524,
365
+ "loss": 0.309,
366
+ "step": 2120
367
+ },
368
+ {
369
+ "epoch": 16.36,
370
+ "learning_rate": 0.00015606936416184968,
371
+ "loss": 0.2553,
372
+ "step": 2160
373
+ },
374
+ {
375
+ "epoch": 16.67,
376
+ "learning_rate": 0.00015260115606936415,
377
+ "loss": 0.279,
378
+ "step": 2200
379
+ },
380
+ {
381
+ "epoch": 16.97,
382
+ "learning_rate": 0.0001491329479768786,
383
+ "loss": 0.2962,
384
+ "step": 2240
385
+ },
386
+ {
387
+ "epoch": 17.27,
388
+ "learning_rate": 0.00014566473988439306,
389
+ "loss": 0.2829,
390
+ "step": 2280
391
+ },
392
+ {
393
+ "epoch": 17.58,
394
+ "learning_rate": 0.0001421965317919075,
395
+ "loss": 0.2428,
396
+ "step": 2320
397
+ },
398
+ {
399
+ "epoch": 17.88,
400
+ "learning_rate": 0.00013872832369942194,
401
+ "loss": 0.2434,
402
+ "step": 2360
403
+ },
404
+ {
405
+ "epoch": 18.18,
406
+ "learning_rate": 0.0001352601156069364,
407
+ "loss": 0.2513,
408
+ "step": 2400
409
+ },
410
+ {
411
+ "epoch": 18.18,
412
+ "eval_loss": 1.0579547882080078,
413
+ "eval_runtime": 84.4423,
414
+ "eval_samples_per_second": 14.223,
415
+ "eval_wer": 0.7682080070911508,
416
+ "step": 2400
417
+ },
418
+ {
419
+ "epoch": 18.48,
420
+ "learning_rate": 0.00013179190751445085,
421
+ "loss": 0.2331,
422
+ "step": 2440
423
+ },
424
+ {
425
+ "epoch": 18.79,
426
+ "learning_rate": 0.00012832369942196532,
427
+ "loss": 0.2275,
428
+ "step": 2480
429
+ },
430
+ {
431
+ "epoch": 19.09,
432
+ "learning_rate": 0.00012485549132947976,
433
+ "loss": 0.2275,
434
+ "step": 2520
435
+ },
436
+ {
437
+ "epoch": 19.39,
438
+ "learning_rate": 0.00012138728323699421,
439
+ "loss": 0.2174,
440
+ "step": 2560
441
+ },
442
+ {
443
+ "epoch": 19.7,
444
+ "learning_rate": 0.00011791907514450866,
445
+ "loss": 0.2202,
446
+ "step": 2600
447
+ },
448
+ {
449
+ "epoch": 20.0,
450
+ "learning_rate": 0.0001144508670520231,
451
+ "loss": 0.2291,
452
+ "step": 2640
453
+ },
454
+ {
455
+ "epoch": 20.3,
456
+ "learning_rate": 0.00011098265895953756,
457
+ "loss": 0.1909,
458
+ "step": 2680
459
+ },
460
+ {
461
+ "epoch": 20.61,
462
+ "learning_rate": 0.00010751445086705201,
463
+ "loss": 0.1897,
464
+ "step": 2720
465
+ },
466
+ {
467
+ "epoch": 20.91,
468
+ "learning_rate": 0.00010404624277456647,
469
+ "loss": 0.22,
470
+ "step": 2760
471
+ },
472
+ {
473
+ "epoch": 21.21,
474
+ "learning_rate": 0.00010057803468208092,
475
+ "loss": 0.194,
476
+ "step": 2800
477
+ },
478
+ {
479
+ "epoch": 21.21,
480
+ "eval_loss": 1.0953465700149536,
481
+ "eval_runtime": 79.9574,
482
+ "eval_samples_per_second": 15.02,
483
+ "eval_wer": 0.7524006500221598,
484
+ "step": 2800
485
+ },
486
+ {
487
+ "epoch": 21.52,
488
+ "learning_rate": 9.710982658959536e-05,
489
+ "loss": 0.1862,
490
+ "step": 2840
491
+ },
492
+ {
493
+ "epoch": 21.82,
494
+ "learning_rate": 9.364161849710982e-05,
495
+ "loss": 0.2012,
496
+ "step": 2880
497
+ },
498
+ {
499
+ "epoch": 22.12,
500
+ "learning_rate": 9.017341040462427e-05,
501
+ "loss": 0.1823,
502
+ "step": 2920
503
+ },
504
+ {
505
+ "epoch": 22.42,
506
+ "learning_rate": 8.670520231213873e-05,
507
+ "loss": 0.1885,
508
+ "step": 2960
509
+ },
510
+ {
511
+ "epoch": 22.73,
512
+ "learning_rate": 8.323699421965317e-05,
513
+ "loss": 0.1616,
514
+ "step": 3000
515
+ },
516
+ {
517
+ "epoch": 23.03,
518
+ "learning_rate": 7.976878612716762e-05,
519
+ "loss": 0.1893,
520
+ "step": 3040
521
+ },
522
+ {
523
+ "epoch": 23.33,
524
+ "learning_rate": 7.630057803468207e-05,
525
+ "loss": 0.1879,
526
+ "step": 3080
527
+ },
528
+ {
529
+ "epoch": 23.64,
530
+ "learning_rate": 7.283236994219653e-05,
531
+ "loss": 0.1759,
532
+ "step": 3120
533
+ },
534
+ {
535
+ "epoch": 23.94,
536
+ "learning_rate": 6.936416184971097e-05,
537
+ "loss": 0.1703,
538
+ "step": 3160
539
+ },
540
+ {
541
+ "epoch": 24.24,
542
+ "learning_rate": 6.589595375722542e-05,
543
+ "loss": 0.1679,
544
+ "step": 3200
545
+ },
546
+ {
547
+ "epoch": 24.24,
548
+ "eval_loss": 1.1073198318481445,
549
+ "eval_runtime": 79.6701,
550
+ "eval_samples_per_second": 15.075,
551
+ "eval_wer": 0.7289112128822574,
552
+ "step": 3200
553
+ },
554
+ {
555
+ "epoch": 24.55,
556
+ "learning_rate": 6.242774566473988e-05,
557
+ "loss": 0.1663,
558
+ "step": 3240
559
+ },
560
+ {
561
+ "epoch": 24.85,
562
+ "learning_rate": 5.895953757225433e-05,
563
+ "loss": 0.1726,
564
+ "step": 3280
565
+ },
566
+ {
567
+ "epoch": 25.15,
568
+ "learning_rate": 5.549132947976878e-05,
569
+ "loss": 0.1733,
570
+ "step": 3320
571
+ },
572
+ {
573
+ "epoch": 25.45,
574
+ "learning_rate": 5.2023121387283234e-05,
575
+ "loss": 0.1764,
576
+ "step": 3360
577
+ },
578
+ {
579
+ "epoch": 25.76,
580
+ "learning_rate": 4.855491329479768e-05,
581
+ "loss": 0.14,
582
+ "step": 3400
583
+ },
584
+ {
585
+ "epoch": 26.06,
586
+ "learning_rate": 4.5086705202312136e-05,
587
+ "loss": 0.164,
588
+ "step": 3440
589
+ },
590
+ {
591
+ "epoch": 26.36,
592
+ "learning_rate": 4.161849710982658e-05,
593
+ "loss": 0.1569,
594
+ "step": 3480
595
+ },
596
+ {
597
+ "epoch": 26.67,
598
+ "learning_rate": 3.815028901734104e-05,
599
+ "loss": 0.1674,
600
+ "step": 3520
601
+ },
602
+ {
603
+ "epoch": 26.97,
604
+ "learning_rate": 3.4682080924855485e-05,
605
+ "loss": 0.1635,
606
+ "step": 3560
607
+ },
608
+ {
609
+ "epoch": 27.27,
610
+ "learning_rate": 3.121387283236994e-05,
611
+ "loss": 0.1372,
612
+ "step": 3600
613
+ },
614
+ {
615
+ "epoch": 27.27,
616
+ "eval_loss": 1.149294376373291,
617
+ "eval_runtime": 54.3637,
618
+ "eval_samples_per_second": 22.092,
619
+ "eval_wer": 0.7225587235928498,
620
+ "step": 3600
621
+ }
622
+ ],
623
+ "max_steps": 3960,
624
+ "num_train_epochs": 30,
625
+ "total_flos": 1.107485248885501e+19,
626
+ "trial_name": null,
627
+ "trial_params": null
628
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e299b5eff76b2f0965206b7254308074518f08e05f5b99d0dc57f79e50d7bd4
3
+ size 2287