chrisvinsen commited on
Commit
30a8e1c
1 Parent(s): 47aa759
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "total_flos": 2.7374813261347353e+19,
4
+ "train_loss": 0.0985529641951284,
5
+ "train_runtime": 13747.1961,
6
+ "train_samples": 3965,
7
+ "train_samples_per_second": 17.305,
8
+ "train_steps_per_second": 0.541
9
+ }
config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.055,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.094,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.04,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.047,
57
+ "hidden_size": 1024,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 4096,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.041,
62
+ "mask_channel_length": 10,
63
+ "mask_channel_min_space": 1,
64
+ "mask_channel_other": 0.0,
65
+ "mask_channel_prob": 0.0,
66
+ "mask_channel_selection": "static",
67
+ "mask_feature_length": 10,
68
+ "mask_feature_min_masks": 0,
69
+ "mask_feature_prob": 0.0,
70
+ "mask_time_length": 10,
71
+ "mask_time_min_masks": 2,
72
+ "mask_time_min_space": 1,
73
+ "mask_time_other": 0.0,
74
+ "mask_time_prob": 0.4,
75
+ "mask_time_selection": "static",
76
+ "model_type": "wav2vec2",
77
+ "num_adapter_layers": 3,
78
+ "num_attention_heads": 16,
79
+ "num_codevector_groups": 2,
80
+ "num_codevectors_per_group": 320,
81
+ "num_conv_pos_embedding_groups": 16,
82
+ "num_conv_pos_embeddings": 128,
83
+ "num_feat_extract_layers": 7,
84
+ "num_hidden_layers": 24,
85
+ "num_negatives": 100,
86
+ "output_hidden_size": 1024,
87
+ "pad_token_id": 28,
88
+ "proj_codevector_dim": 768,
89
+ "tdnn_dilation": [
90
+ 1,
91
+ 2,
92
+ 3,
93
+ 1,
94
+ 1
95
+ ],
96
+ "tdnn_dim": [
97
+ 512,
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 1500
102
+ ],
103
+ "tdnn_kernel": [
104
+ 5,
105
+ 3,
106
+ 3,
107
+ 1,
108
+ 1
109
+ ],
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.18.0",
112
+ "use_weighted_layer_sum": false,
113
+ "vocab_size": 29,
114
+ "xvector_output_dim": 512
115
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "total_flos": 2.7374813261347353e+19,
4
+ "train_loss": 0.0985529641951284,
5
+ "train_runtime": 13747.1961,
6
+ "train_samples": 3965,
7
+ "train_samples_per_second": 17.305,
8
+ "train_steps_per_second": 0.541
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 60.0,
5
+ "global_step": 7440,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.81,
12
+ "learning_rate": 1.2903225806451613e-05,
13
+ "loss": 13.4856,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.81,
18
+ "eval_loss": 12.348929405212402,
19
+ "eval_runtime": 128.1609,
20
+ "eval_samples_per_second": 14.388,
21
+ "eval_steps_per_second": 1.802,
22
+ "eval_wer": 1.0,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 1.61,
27
+ "learning_rate": 2.6344086021505376e-05,
28
+ "loss": 4.9759,
29
+ "step": 200
30
+ },
31
+ {
32
+ "epoch": 1.61,
33
+ "eval_loss": 3.1773388385772705,
34
+ "eval_runtime": 128.7202,
35
+ "eval_samples_per_second": 14.326,
36
+ "eval_steps_per_second": 1.795,
37
+ "eval_wer": 1.0,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 2.42,
42
+ "learning_rate": 3.978494623655914e-05,
43
+ "loss": 2.9905,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 2.42,
48
+ "eval_loss": 2.9219164848327637,
49
+ "eval_runtime": 129.5213,
50
+ "eval_samples_per_second": 14.237,
51
+ "eval_steps_per_second": 1.783,
52
+ "eval_wer": 1.0,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 3.23,
57
+ "learning_rate": 5.32258064516129e-05,
58
+ "loss": 2.8774,
59
+ "step": 400
60
+ },
61
+ {
62
+ "epoch": 3.23,
63
+ "eval_loss": 2.8899402618408203,
64
+ "eval_runtime": 129.0058,
65
+ "eval_samples_per_second": 14.294,
66
+ "eval_steps_per_second": 1.791,
67
+ "eval_wer": 1.0,
68
+ "step": 400
69
+ },
70
+ {
71
+ "epoch": 4.03,
72
+ "learning_rate": 6.666666666666667e-05,
73
+ "loss": 2.832,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 4.03,
78
+ "eval_loss": 2.787729501724243,
79
+ "eval_runtime": 129.1257,
80
+ "eval_samples_per_second": 14.281,
81
+ "eval_steps_per_second": 1.789,
82
+ "eval_wer": 1.0,
83
+ "step": 500
84
+ },
85
+ {
86
+ "epoch": 4.84,
87
+ "learning_rate": 8.010752688172043e-05,
88
+ "loss": 2.6921,
89
+ "step": 600
90
+ },
91
+ {
92
+ "epoch": 4.84,
93
+ "eval_loss": 2.3222157955169678,
94
+ "eval_runtime": 129.5689,
95
+ "eval_samples_per_second": 14.232,
96
+ "eval_steps_per_second": 1.783,
97
+ "eval_wer": 1.020324445273168,
98
+ "step": 600
99
+ },
100
+ {
101
+ "epoch": 5.65,
102
+ "learning_rate": 9.35483870967742e-05,
103
+ "loss": 1.7609,
104
+ "step": 700
105
+ },
106
+ {
107
+ "epoch": 5.65,
108
+ "eval_loss": 0.7123318910598755,
109
+ "eval_runtime": 129.8973,
110
+ "eval_samples_per_second": 14.196,
111
+ "eval_steps_per_second": 1.778,
112
+ "eval_wer": 0.7778295730001865,
113
+ "step": 700
114
+ },
115
+ {
116
+ "epoch": 6.45,
117
+ "learning_rate": 0.0001,
118
+ "loss": 1.0829,
119
+ "step": 800
120
+ },
121
+ {
122
+ "epoch": 6.45,
123
+ "eval_loss": 0.5219887495040894,
124
+ "eval_runtime": 130.0287,
125
+ "eval_samples_per_second": 14.181,
126
+ "eval_steps_per_second": 1.777,
127
+ "eval_wer": 0.6609173969793026,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 7.26,
132
+ "learning_rate": 0.0001,
133
+ "loss": 0.8991,
134
+ "step": 900
135
+ },
136
+ {
137
+ "epoch": 7.26,
138
+ "eval_loss": 0.4353589415550232,
139
+ "eval_runtime": 129.1246,
140
+ "eval_samples_per_second": 14.281,
141
+ "eval_steps_per_second": 1.789,
142
+ "eval_wer": 0.5778482192802535,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 8.06,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.7943,
149
+ "step": 1000
150
+ },
151
+ {
152
+ "epoch": 8.06,
153
+ "eval_loss": 0.409180223941803,
154
+ "eval_runtime": 129.6166,
155
+ "eval_samples_per_second": 14.227,
156
+ "eval_steps_per_second": 1.782,
157
+ "eval_wer": 0.5485735595748649,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 8.87,
162
+ "learning_rate": 0.0001,
163
+ "loss": 0.7319,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 8.87,
168
+ "eval_loss": 0.37231260538101196,
169
+ "eval_runtime": 129.5968,
170
+ "eval_samples_per_second": 14.229,
171
+ "eval_steps_per_second": 1.782,
172
+ "eval_wer": 0.5166884206600784,
173
+ "step": 1100
174
+ },
175
+ {
176
+ "epoch": 9.68,
177
+ "learning_rate": 0.0001,
178
+ "loss": 0.6875,
179
+ "step": 1200
180
+ },
181
+ {
182
+ "epoch": 9.68,
183
+ "eval_loss": 0.3546667993068695,
184
+ "eval_runtime": 129.8844,
185
+ "eval_samples_per_second": 14.197,
186
+ "eval_steps_per_second": 1.779,
187
+ "eval_wer": 0.5021443222077195,
188
+ "step": 1200
189
+ },
190
+ {
191
+ "epoch": 10.48,
192
+ "learning_rate": 0.0001,
193
+ "loss": 0.6538,
194
+ "step": 1300
195
+ },
196
+ {
197
+ "epoch": 10.48,
198
+ "eval_loss": 0.3382996916770935,
199
+ "eval_runtime": 130.4091,
200
+ "eval_samples_per_second": 14.14,
201
+ "eval_steps_per_second": 1.771,
202
+ "eval_wer": 0.5028901734104047,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 11.29,
207
+ "learning_rate": 0.0001,
208
+ "loss": 0.6385,
209
+ "step": 1400
210
+ },
211
+ {
212
+ "epoch": 11.29,
213
+ "eval_loss": 0.33781182765960693,
214
+ "eval_runtime": 129.3767,
215
+ "eval_samples_per_second": 14.253,
216
+ "eval_steps_per_second": 1.785,
217
+ "eval_wer": 0.4781838523214619,
218
+ "step": 1400
219
+ },
220
+ {
221
+ "epoch": 12.1,
222
+ "learning_rate": 0.0001,
223
+ "loss": 0.6031,
224
+ "step": 1500
225
+ },
226
+ {
227
+ "epoch": 12.1,
228
+ "eval_loss": 0.33670100569725037,
229
+ "eval_runtime": 130.1665,
230
+ "eval_samples_per_second": 14.166,
231
+ "eval_steps_per_second": 1.775,
232
+ "eval_wer": 0.47445459630803655,
233
+ "step": 1500
234
+ },
235
+ {
236
+ "epoch": 12.9,
237
+ "learning_rate": 0.0001,
238
+ "loss": 0.5778,
239
+ "step": 1600
240
+ },
241
+ {
242
+ "epoch": 12.9,
243
+ "eval_loss": 0.3219880163669586,
244
+ "eval_runtime": 130.1154,
245
+ "eval_samples_per_second": 14.172,
246
+ "eval_steps_per_second": 1.775,
247
+ "eval_wer": 0.4559015476412456,
248
+ "step": 1600
249
+ },
250
+ {
251
+ "epoch": 13.71,
252
+ "learning_rate": 0.0001,
253
+ "loss": 0.5531,
254
+ "step": 1700
255
+ },
256
+ {
257
+ "epoch": 13.71,
258
+ "eval_loss": 0.3436262309551239,
259
+ "eval_runtime": 130.5395,
260
+ "eval_samples_per_second": 14.126,
261
+ "eval_steps_per_second": 1.77,
262
+ "eval_wer": 0.4655043818758158,
263
+ "step": 1700
264
+ },
265
+ {
266
+ "epoch": 14.52,
267
+ "learning_rate": 0.0001,
268
+ "loss": 0.5506,
269
+ "step": 1800
270
+ },
271
+ {
272
+ "epoch": 14.52,
273
+ "eval_loss": 0.32741010189056396,
274
+ "eval_runtime": 130.5027,
275
+ "eval_samples_per_second": 14.13,
276
+ "eval_steps_per_second": 1.77,
277
+ "eval_wer": 0.4504008950214432,
278
+ "step": 1800
279
+ },
280
+ {
281
+ "epoch": 15.32,
282
+ "learning_rate": 0.0001,
283
+ "loss": 0.5186,
284
+ "step": 1900
285
+ },
286
+ {
287
+ "epoch": 15.32,
288
+ "eval_loss": 0.3106687664985657,
289
+ "eval_runtime": 131.9389,
290
+ "eval_samples_per_second": 13.976,
291
+ "eval_steps_per_second": 1.751,
292
+ "eval_wer": 0.44368823419727765,
293
+ "step": 1900
294
+ },
295
+ {
296
+ "epoch": 16.13,
297
+ "learning_rate": 0.0001,
298
+ "loss": 0.5065,
299
+ "step": 2000
300
+ },
301
+ {
302
+ "epoch": 16.13,
303
+ "eval_loss": 0.3165331482887268,
304
+ "eval_runtime": 130.835,
305
+ "eval_samples_per_second": 14.094,
306
+ "eval_steps_per_second": 1.766,
307
+ "eval_wer": 0.44005220958418795,
308
+ "step": 2000
309
+ },
310
+ {
311
+ "epoch": 16.94,
312
+ "learning_rate": 0.0001,
313
+ "loss": 0.5016,
314
+ "step": 2100
315
+ },
316
+ {
317
+ "epoch": 16.94,
318
+ "eval_loss": 0.3141399919986725,
319
+ "eval_runtime": 131.1616,
320
+ "eval_samples_per_second": 14.059,
321
+ "eval_steps_per_second": 1.761,
322
+ "eval_wer": 0.440798060786873,
323
+ "step": 2100
324
+ },
325
+ {
326
+ "epoch": 17.74,
327
+ "learning_rate": 0.0001,
328
+ "loss": 0.4878,
329
+ "step": 2200
330
+ },
331
+ {
332
+ "epoch": 17.74,
333
+ "eval_loss": 0.323138564825058,
334
+ "eval_runtime": 129.6671,
335
+ "eval_samples_per_second": 14.221,
336
+ "eval_steps_per_second": 1.781,
337
+ "eval_wer": 0.4408912921872086,
338
+ "step": 2200
339
+ },
340
+ {
341
+ "epoch": 18.55,
342
+ "learning_rate": 0.0001,
343
+ "loss": 0.4701,
344
+ "step": 2300
345
+ },
346
+ {
347
+ "epoch": 18.55,
348
+ "eval_loss": 0.314998984336853,
349
+ "eval_runtime": 130.2103,
350
+ "eval_samples_per_second": 14.162,
351
+ "eval_steps_per_second": 1.774,
352
+ "eval_wer": 0.441823606190565,
353
+ "step": 2300
354
+ },
355
+ {
356
+ "epoch": 19.35,
357
+ "learning_rate": 0.0001,
358
+ "loss": 0.4643,
359
+ "step": 2400
360
+ },
361
+ {
362
+ "epoch": 19.35,
363
+ "eval_loss": 0.32002875208854675,
364
+ "eval_runtime": 129.7219,
365
+ "eval_samples_per_second": 14.215,
366
+ "eval_steps_per_second": 1.781,
367
+ "eval_wer": 0.43641618497109824,
368
+ "step": 2400
369
+ },
370
+ {
371
+ "epoch": 20.16,
372
+ "learning_rate": 0.0001,
373
+ "loss": 0.4623,
374
+ "step": 2500
375
+ },
376
+ {
377
+ "epoch": 20.16,
378
+ "eval_loss": 0.3018810749053955,
379
+ "eval_runtime": 127.3741,
380
+ "eval_samples_per_second": 14.477,
381
+ "eval_steps_per_second": 1.814,
382
+ "eval_wer": 0.42187208651873953,
383
+ "step": 2500
384
+ },
385
+ {
386
+ "epoch": 20.97,
387
+ "learning_rate": 0.0001,
388
+ "loss": 0.4497,
389
+ "step": 2600
390
+ },
391
+ {
392
+ "epoch": 20.97,
393
+ "eval_loss": 0.3108297288417816,
394
+ "eval_runtime": 127.9529,
395
+ "eval_samples_per_second": 14.412,
396
+ "eval_steps_per_second": 1.805,
397
+ "eval_wer": 0.43054260674995337,
398
+ "step": 2600
399
+ },
400
+ {
401
+ "epoch": 21.77,
402
+ "learning_rate": 0.0001,
403
+ "loss": 0.4439,
404
+ "step": 2700
405
+ },
406
+ {
407
+ "epoch": 21.77,
408
+ "eval_loss": 0.31016433238983154,
409
+ "eval_runtime": 127.0701,
410
+ "eval_samples_per_second": 14.512,
411
+ "eval_steps_per_second": 1.818,
412
+ "eval_wer": 0.42429610292746595,
413
+ "step": 2700
414
+ },
415
+ {
416
+ "epoch": 22.58,
417
+ "learning_rate": 0.0001,
418
+ "loss": 0.4289,
419
+ "step": 2800
420
+ },
421
+ {
422
+ "epoch": 22.58,
423
+ "eval_loss": 0.29791948199272156,
424
+ "eval_runtime": 128.4647,
425
+ "eval_samples_per_second": 14.354,
426
+ "eval_steps_per_second": 1.798,
427
+ "eval_wer": 0.41935483870967744,
428
+ "step": 2800
429
+ },
430
+ {
431
+ "epoch": 23.39,
432
+ "learning_rate": 0.0001,
433
+ "loss": 0.423,
434
+ "step": 2900
435
+ },
436
+ {
437
+ "epoch": 23.39,
438
+ "eval_loss": 0.3108172118663788,
439
+ "eval_runtime": 128.2048,
440
+ "eval_samples_per_second": 14.383,
441
+ "eval_steps_per_second": 1.802,
442
+ "eval_wer": 0.41832929330598545,
443
+ "step": 2900
444
+ },
445
+ {
446
+ "epoch": 24.19,
447
+ "learning_rate": 0.0001,
448
+ "loss": 0.4214,
449
+ "step": 3000
450
+ },
451
+ {
452
+ "epoch": 24.19,
453
+ "eval_loss": 0.3556348383426666,
454
+ "eval_runtime": 128.3694,
455
+ "eval_samples_per_second": 14.365,
456
+ "eval_steps_per_second": 1.799,
457
+ "eval_wer": 0.42000745851202687,
458
+ "step": 3000
459
+ },
460
+ {
461
+ "epoch": 25.0,
462
+ "learning_rate": 0.0001,
463
+ "loss": 0.4139,
464
+ "step": 3100
465
+ },
466
+ {
467
+ "epoch": 25.0,
468
+ "eval_loss": 0.3197577893733978,
469
+ "eval_runtime": 128.1255,
470
+ "eval_samples_per_second": 14.392,
471
+ "eval_steps_per_second": 1.803,
472
+ "eval_wer": 0.42243147492075334,
473
+ "step": 3100
474
+ },
475
+ {
476
+ "epoch": 25.81,
477
+ "learning_rate": 0.0001,
478
+ "loss": 0.4015,
479
+ "step": 3200
480
+ },
481
+ {
482
+ "epoch": 25.81,
483
+ "eval_loss": 0.29826977849006653,
484
+ "eval_runtime": 128.9516,
485
+ "eval_samples_per_second": 14.3,
486
+ "eval_steps_per_second": 1.791,
487
+ "eval_wer": 0.4151594256945739,
488
+ "step": 3200
489
+ },
490
+ {
491
+ "epoch": 26.61,
492
+ "learning_rate": 0.0001,
493
+ "loss": 0.4044,
494
+ "step": 3300
495
+ },
496
+ {
497
+ "epoch": 26.61,
498
+ "eval_loss": 0.28930172324180603,
499
+ "eval_runtime": 128.281,
500
+ "eval_samples_per_second": 14.375,
501
+ "eval_steps_per_second": 1.801,
502
+ "eval_wer": 0.4065821368636957,
503
+ "step": 3300
504
+ },
505
+ {
506
+ "epoch": 27.42,
507
+ "learning_rate": 0.0001,
508
+ "loss": 0.4013,
509
+ "step": 3400
510
+ },
511
+ {
512
+ "epoch": 27.42,
513
+ "eval_loss": 0.32470086216926575,
514
+ "eval_runtime": 128.6984,
515
+ "eval_samples_per_second": 14.328,
516
+ "eval_steps_per_second": 1.795,
517
+ "eval_wer": 0.4160917396979303,
518
+ "step": 3400
519
+ },
520
+ {
521
+ "epoch": 28.23,
522
+ "learning_rate": 0.0001,
523
+ "loss": 0.3959,
524
+ "step": 3500
525
+ },
526
+ {
527
+ "epoch": 28.23,
528
+ "eval_loss": 0.32055872678756714,
529
+ "eval_runtime": 128.9252,
530
+ "eval_samples_per_second": 14.303,
531
+ "eval_steps_per_second": 1.792,
532
+ "eval_wer": 0.4156255826962521,
533
+ "step": 3500
534
+ },
535
+ {
536
+ "epoch": 29.03,
537
+ "learning_rate": 0.0001,
538
+ "loss": 0.3923,
539
+ "step": 3600
540
+ },
541
+ {
542
+ "epoch": 29.03,
543
+ "eval_loss": 0.32669639587402344,
544
+ "eval_runtime": 129.4219,
545
+ "eval_samples_per_second": 14.248,
546
+ "eval_steps_per_second": 1.785,
547
+ "eval_wer": 0.41432034309155324,
548
+ "step": 3600
549
+ },
550
+ {
551
+ "epoch": 29.84,
552
+ "learning_rate": 0.0001,
553
+ "loss": 0.3789,
554
+ "step": 3700
555
+ },
556
+ {
557
+ "epoch": 29.84,
558
+ "eval_loss": 0.31051748991012573,
559
+ "eval_runtime": 129.0113,
560
+ "eval_samples_per_second": 14.293,
561
+ "eval_steps_per_second": 1.791,
562
+ "eval_wer": 0.4084467648704084,
563
+ "step": 3700
564
+ },
565
+ {
566
+ "epoch": 30.65,
567
+ "learning_rate": 9.795698924731184e-05,
568
+ "loss": 0.371,
569
+ "step": 3800
570
+ },
571
+ {
572
+ "epoch": 30.65,
573
+ "eval_loss": 0.31601375341415405,
574
+ "eval_runtime": 130.3347,
575
+ "eval_samples_per_second": 14.148,
576
+ "eval_steps_per_second": 1.772,
577
+ "eval_wer": 0.41031139287712104,
578
+ "step": 3800
579
+ },
580
+ {
581
+ "epoch": 31.45,
582
+ "learning_rate": 9.526881720430108e-05,
583
+ "loss": 0.3813,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 31.45,
588
+ "eval_loss": 0.32537344098091125,
589
+ "eval_runtime": 129.4643,
590
+ "eval_samples_per_second": 14.243,
591
+ "eval_steps_per_second": 1.784,
592
+ "eval_wer": 0.4071415252657095,
593
+ "step": 3900
594
+ },
595
+ {
596
+ "epoch": 32.26,
597
+ "learning_rate": 9.258064516129033e-05,
598
+ "loss": 0.3575,
599
+ "step": 4000
600
+ },
601
+ {
602
+ "epoch": 32.26,
603
+ "eval_loss": 0.35630208253860474,
604
+ "eval_runtime": 129.9641,
605
+ "eval_samples_per_second": 14.189,
606
+ "eval_steps_per_second": 1.777,
607
+ "eval_wer": 0.40928584747342905,
608
+ "step": 4000
609
+ },
610
+ {
611
+ "epoch": 33.06,
612
+ "learning_rate": 8.989247311827958e-05,
613
+ "loss": 0.3642,
614
+ "step": 4100
615
+ },
616
+ {
617
+ "epoch": 33.06,
618
+ "eval_loss": 0.29261836409568787,
619
+ "eval_runtime": 129.3284,
620
+ "eval_samples_per_second": 14.258,
621
+ "eval_steps_per_second": 1.786,
622
+ "eval_wer": 0.4012679470445646,
623
+ "step": 4100
624
+ },
625
+ {
626
+ "epoch": 33.87,
627
+ "learning_rate": 8.720430107526883e-05,
628
+ "loss": 0.3494,
629
+ "step": 4200
630
+ },
631
+ {
632
+ "epoch": 33.87,
633
+ "eval_loss": 0.3040522634983063,
634
+ "eval_runtime": 129.9364,
635
+ "eval_samples_per_second": 14.192,
636
+ "eval_steps_per_second": 1.778,
637
+ "eval_wer": 0.39912362483684505,
638
+ "step": 4200
639
+ },
640
+ {
641
+ "epoch": 34.68,
642
+ "learning_rate": 8.451612903225808e-05,
643
+ "loss": 0.3446,
644
+ "step": 4300
645
+ },
646
+ {
647
+ "epoch": 34.68,
648
+ "eval_loss": 0.30367347598075867,
649
+ "eval_runtime": 129.7515,
650
+ "eval_samples_per_second": 14.212,
651
+ "eval_steps_per_second": 1.78,
652
+ "eval_wer": 0.3964199142271117,
653
+ "step": 4300
654
+ },
655
+ {
656
+ "epoch": 35.48,
657
+ "learning_rate": 8.182795698924732e-05,
658
+ "loss": 0.3458,
659
+ "step": 4400
660
+ },
661
+ {
662
+ "epoch": 35.48,
663
+ "eval_loss": 0.3123793303966522,
664
+ "eval_runtime": 129.7166,
665
+ "eval_samples_per_second": 14.216,
666
+ "eval_steps_per_second": 1.781,
667
+ "eval_wer": 0.3993100876375163,
668
+ "step": 4400
669
+ },
670
+ {
671
+ "epoch": 36.29,
672
+ "learning_rate": 7.913978494623657e-05,
673
+ "loss": 0.3418,
674
+ "step": 4500
675
+ },
676
+ {
677
+ "epoch": 36.29,
678
+ "eval_loss": 0.33982059359550476,
679
+ "eval_runtime": 130.2572,
680
+ "eval_samples_per_second": 14.157,
681
+ "eval_steps_per_second": 1.773,
682
+ "eval_wer": 0.40052209584187953,
683
+ "step": 4500
684
+ },
685
+ {
686
+ "epoch": 37.1,
687
+ "learning_rate": 7.647849462365591e-05,
688
+ "loss": 0.3424,
689
+ "step": 4600
690
+ },
691
+ {
692
+ "epoch": 37.1,
693
+ "eval_loss": 0.3201097548007965,
694
+ "eval_runtime": 130.7802,
695
+ "eval_samples_per_second": 14.1,
696
+ "eval_steps_per_second": 1.766,
697
+ "eval_wer": 0.4044378146559761,
698
+ "step": 4600
699
+ },
700
+ {
701
+ "epoch": 37.9,
702
+ "learning_rate": 7.379032258064516e-05,
703
+ "loss": 0.3256,
704
+ "step": 4700
705
+ },
706
+ {
707
+ "epoch": 37.9,
708
+ "eval_loss": 0.31234973669052124,
709
+ "eval_runtime": 129.793,
710
+ "eval_samples_per_second": 14.207,
711
+ "eval_steps_per_second": 1.78,
712
+ "eval_wer": 0.39912362483684505,
713
+ "step": 4700
714
+ },
715
+ {
716
+ "epoch": 38.71,
717
+ "learning_rate": 7.110215053763441e-05,
718
+ "loss": 0.3183,
719
+ "step": 4800
720
+ },
721
+ {
722
+ "epoch": 38.71,
723
+ "eval_loss": 0.3197832405567169,
724
+ "eval_runtime": 131.4284,
725
+ "eval_samples_per_second": 14.03,
726
+ "eval_steps_per_second": 1.758,
727
+ "eval_wer": 0.39707253402946113,
728
+ "step": 4800
729
+ },
730
+ {
731
+ "epoch": 39.52,
732
+ "learning_rate": 6.841397849462365e-05,
733
+ "loss": 0.3189,
734
+ "step": 4900
735
+ },
736
+ {
737
+ "epoch": 39.52,
738
+ "eval_loss": 0.32954466342926025,
739
+ "eval_runtime": 135.8464,
740
+ "eval_samples_per_second": 13.574,
741
+ "eval_steps_per_second": 1.7,
742
+ "eval_wer": 0.4004288644415439,
743
+ "step": 4900
744
+ },
745
+ {
746
+ "epoch": 40.32,
747
+ "learning_rate": 6.57258064516129e-05,
748
+ "loss": 0.3227,
749
+ "step": 5000
750
+ },
751
+ {
752
+ "epoch": 40.32,
753
+ "eval_loss": 0.32881081104278564,
754
+ "eval_runtime": 135.8344,
755
+ "eval_samples_per_second": 13.575,
756
+ "eval_steps_per_second": 1.701,
757
+ "eval_wer": 0.39604698862576915,
758
+ "step": 5000
759
+ },
760
+ {
761
+ "epoch": 41.13,
762
+ "learning_rate": 6.303763440860215e-05,
763
+ "loss": 0.3007,
764
+ "step": 5100
765
+ },
766
+ {
767
+ "epoch": 41.13,
768
+ "eval_loss": 0.3239193856716156,
769
+ "eval_runtime": 131.4702,
770
+ "eval_samples_per_second": 14.026,
771
+ "eval_steps_per_second": 1.757,
772
+ "eval_wer": 0.39558083162409097,
773
+ "step": 5100
774
+ },
775
+ {
776
+ "epoch": 41.94,
777
+ "learning_rate": 6.0349462365591405e-05,
778
+ "loss": 0.3013,
779
+ "step": 5200
780
+ },
781
+ {
782
+ "epoch": 41.94,
783
+ "eval_loss": 0.32908666133880615,
784
+ "eval_runtime": 131.5819,
785
+ "eval_samples_per_second": 14.014,
786
+ "eval_steps_per_second": 1.756,
787
+ "eval_wer": 0.39464851762073466,
788
+ "step": 5200
789
+ },
790
+ {
791
+ "epoch": 42.74,
792
+ "learning_rate": 5.7661290322580655e-05,
793
+ "loss": 0.3067,
794
+ "step": 5300
795
+ },
796
+ {
797
+ "epoch": 42.74,
798
+ "eval_loss": 0.3361692726612091,
799
+ "eval_runtime": 131.6013,
800
+ "eval_samples_per_second": 14.012,
801
+ "eval_steps_per_second": 1.755,
802
+ "eval_wer": 0.3944620548200634,
803
+ "step": 5300
804
+ },
805
+ {
806
+ "epoch": 43.55,
807
+ "learning_rate": 5.497311827956989e-05,
808
+ "loss": 0.3047,
809
+ "step": 5400
810
+ },
811
+ {
812
+ "epoch": 43.55,
813
+ "eval_loss": 0.32479631900787354,
814
+ "eval_runtime": 131.6497,
815
+ "eval_samples_per_second": 14.007,
816
+ "eval_steps_per_second": 1.755,
817
+ "eval_wer": 0.39297035241469325,
818
+ "step": 5400
819
+ },
820
+ {
821
+ "epoch": 44.35,
822
+ "learning_rate": 5.228494623655914e-05,
823
+ "loss": 0.2939,
824
+ "step": 5500
825
+ },
826
+ {
827
+ "epoch": 44.35,
828
+ "eval_loss": 0.3141416013240814,
829
+ "eval_runtime": 131.4484,
830
+ "eval_samples_per_second": 14.028,
831
+ "eval_steps_per_second": 1.757,
832
+ "eval_wer": 0.392783889614022,
833
+ "step": 5500
834
+ },
835
+ {
836
+ "epoch": 45.16,
837
+ "learning_rate": 4.959677419354839e-05,
838
+ "loss": 0.2795,
839
+ "step": 5600
840
+ },
841
+ {
842
+ "epoch": 45.16,
843
+ "eval_loss": 0.3212459981441498,
844
+ "eval_runtime": 132.0759,
845
+ "eval_samples_per_second": 13.962,
846
+ "eval_steps_per_second": 1.749,
847
+ "eval_wer": 0.392783889614022,
848
+ "step": 5600
849
+ },
850
+ {
851
+ "epoch": 45.97,
852
+ "learning_rate": 4.690860215053764e-05,
853
+ "loss": 0.295,
854
+ "step": 5700
855
+ },
856
+ {
857
+ "epoch": 45.97,
858
+ "eval_loss": 0.33260008692741394,
859
+ "eval_runtime": 131.9753,
860
+ "eval_samples_per_second": 13.972,
861
+ "eval_steps_per_second": 1.75,
862
+ "eval_wer": 0.39091926160730933,
863
+ "step": 5700
864
+ },
865
+ {
866
+ "epoch": 46.77,
867
+ "learning_rate": 4.4220430107526885e-05,
868
+ "loss": 0.2819,
869
+ "step": 5800
870
+ },
871
+ {
872
+ "epoch": 46.77,
873
+ "eval_loss": 0.3328990936279297,
874
+ "eval_runtime": 132.6165,
875
+ "eval_samples_per_second": 13.905,
876
+ "eval_steps_per_second": 1.742,
877
+ "eval_wer": 0.39408912921872086,
878
+ "step": 5800
879
+ },
880
+ {
881
+ "epoch": 47.58,
882
+ "learning_rate": 4.1532258064516135e-05,
883
+ "loss": 0.2881,
884
+ "step": 5900
885
+ },
886
+ {
887
+ "epoch": 47.58,
888
+ "eval_loss": 0.32989582419395447,
889
+ "eval_runtime": 132.6745,
890
+ "eval_samples_per_second": 13.899,
891
+ "eval_steps_per_second": 1.741,
892
+ "eval_wer": 0.392783889614022,
893
+ "step": 5900
894
+ },
895
+ {
896
+ "epoch": 48.39,
897
+ "learning_rate": 3.884408602150538e-05,
898
+ "loss": 0.2826,
899
+ "step": 6000
900
+ },
901
+ {
902
+ "epoch": 48.39,
903
+ "eval_loss": 0.33995482325553894,
904
+ "eval_runtime": 132.1443,
905
+ "eval_samples_per_second": 13.954,
906
+ "eval_steps_per_second": 1.748,
907
+ "eval_wer": 0.3911057244079806,
908
+ "step": 6000
909
+ },
910
+ {
911
+ "epoch": 49.19,
912
+ "learning_rate": 3.615591397849463e-05,
913
+ "loss": 0.2741,
914
+ "step": 6100
915
+ },
916
+ {
917
+ "epoch": 49.19,
918
+ "eval_loss": 0.3233118951320648,
919
+ "eval_runtime": 132.6042,
920
+ "eval_samples_per_second": 13.906,
921
+ "eval_steps_per_second": 1.742,
922
+ "eval_wer": 0.38784262539623343,
923
+ "step": 6100
924
+ },
925
+ {
926
+ "epoch": 50.0,
927
+ "learning_rate": 3.346774193548387e-05,
928
+ "loss": 0.2621,
929
+ "step": 6200
930
+ },
931
+ {
932
+ "epoch": 50.0,
933
+ "eval_loss": 0.31878435611724854,
934
+ "eval_runtime": 133.6242,
935
+ "eval_samples_per_second": 13.8,
936
+ "eval_steps_per_second": 1.729,
937
+ "eval_wer": 0.38905463360059667,
938
+ "step": 6200
939
+ },
940
+ {
941
+ "epoch": 50.81,
942
+ "learning_rate": 3.077956989247312e-05,
943
+ "loss": 0.2743,
944
+ "step": 6300
945
+ },
946
+ {
947
+ "epoch": 50.81,
948
+ "eval_loss": 0.32761555910110474,
949
+ "eval_runtime": 133.7291,
950
+ "eval_samples_per_second": 13.789,
951
+ "eval_steps_per_second": 1.727,
952
+ "eval_wer": 0.3868170799925415,
953
+ "step": 6300
954
+ },
955
+ {
956
+ "epoch": 51.61,
957
+ "learning_rate": 2.8091397849462365e-05,
958
+ "loss": 0.272,
959
+ "step": 6400
960
+ },
961
+ {
962
+ "epoch": 51.61,
963
+ "eval_loss": 0.3271888196468353,
964
+ "eval_runtime": 133.5583,
965
+ "eval_samples_per_second": 13.807,
966
+ "eval_steps_per_second": 1.73,
967
+ "eval_wer": 0.38551184038784264,
968
+ "step": 6400
969
+ },
970
+ {
971
+ "epoch": 52.42,
972
+ "learning_rate": 2.5403225806451615e-05,
973
+ "loss": 0.2717,
974
+ "step": 6500
975
+ },
976
+ {
977
+ "epoch": 52.42,
978
+ "eval_loss": 0.32686877250671387,
979
+ "eval_runtime": 133.4336,
980
+ "eval_samples_per_second": 13.82,
981
+ "eval_steps_per_second": 1.731,
982
+ "eval_wer": 0.38299459257878055,
983
+ "step": 6500
984
+ },
985
+ {
986
+ "epoch": 53.23,
987
+ "learning_rate": 2.271505376344086e-05,
988
+ "loss": 0.261,
989
+ "step": 6600
990
+ },
991
+ {
992
+ "epoch": 53.23,
993
+ "eval_loss": 0.3300979435443878,
994
+ "eval_runtime": 133.7969,
995
+ "eval_samples_per_second": 13.782,
996
+ "eval_steps_per_second": 1.726,
997
+ "eval_wer": 0.3838336751818012,
998
+ "step": 6600
999
+ },
1000
+ {
1001
+ "epoch": 54.03,
1002
+ "learning_rate": 2.002688172043011e-05,
1003
+ "loss": 0.2499,
1004
+ "step": 6700
1005
+ },
1006
+ {
1007
+ "epoch": 54.03,
1008
+ "eval_loss": 0.3309214413166046,
1009
+ "eval_runtime": 133.9684,
1010
+ "eval_samples_per_second": 13.764,
1011
+ "eval_steps_per_second": 1.724,
1012
+ "eval_wer": 0.38560507178817827,
1013
+ "step": 6700
1014
+ },
1015
+ {
1016
+ "epoch": 54.84,
1017
+ "learning_rate": 1.733870967741936e-05,
1018
+ "loss": 0.2617,
1019
+ "step": 6800
1020
+ },
1021
+ {
1022
+ "epoch": 54.84,
1023
+ "eval_loss": 0.32907187938690186,
1024
+ "eval_runtime": 135.3114,
1025
+ "eval_samples_per_second": 13.628,
1026
+ "eval_steps_per_second": 1.707,
1027
+ "eval_wer": 0.3820622785754242,
1028
+ "step": 6800
1029
+ },
1030
+ {
1031
+ "epoch": 55.65,
1032
+ "learning_rate": 1.4650537634408603e-05,
1033
+ "loss": 0.2628,
1034
+ "step": 6900
1035
+ },
1036
+ {
1037
+ "epoch": 55.65,
1038
+ "eval_loss": 0.32681843638420105,
1039
+ "eval_runtime": 135.4461,
1040
+ "eval_samples_per_second": 13.614,
1041
+ "eval_steps_per_second": 1.705,
1042
+ "eval_wer": 0.38085027037106095,
1043
+ "step": 6900
1044
+ },
1045
+ {
1046
+ "epoch": 56.45,
1047
+ "learning_rate": 1.196236559139785e-05,
1048
+ "loss": 0.2528,
1049
+ "step": 7000
1050
+ },
1051
+ {
1052
+ "epoch": 56.45,
1053
+ "eval_loss": 0.32666248083114624,
1054
+ "eval_runtime": 134.5769,
1055
+ "eval_samples_per_second": 13.702,
1056
+ "eval_steps_per_second": 1.716,
1057
+ "eval_wer": 0.3807570389707253,
1058
+ "step": 7000
1059
+ },
1060
+ {
1061
+ "epoch": 57.26,
1062
+ "learning_rate": 9.274193548387097e-06,
1063
+ "loss": 0.2475,
1064
+ "step": 7100
1065
+ },
1066
+ {
1067
+ "epoch": 57.26,
1068
+ "eval_loss": 0.32469746470451355,
1069
+ "eval_runtime": 135.31,
1070
+ "eval_samples_per_second": 13.628,
1071
+ "eval_steps_per_second": 1.707,
1072
+ "eval_wer": 0.37963826216669777,
1073
+ "step": 7100
1074
+ },
1075
+ {
1076
+ "epoch": 58.06,
1077
+ "learning_rate": 6.586021505376344e-06,
1078
+ "loss": 0.2545,
1079
+ "step": 7200
1080
+ },
1081
+ {
1082
+ "epoch": 58.06,
1083
+ "eval_loss": 0.32185879349708557,
1084
+ "eval_runtime": 135.4299,
1085
+ "eval_samples_per_second": 13.616,
1086
+ "eval_steps_per_second": 1.706,
1087
+ "eval_wer": 0.3789856423643483,
1088
+ "step": 7200
1089
+ },
1090
+ {
1091
+ "epoch": 58.87,
1092
+ "learning_rate": 3.8978494623655915e-06,
1093
+ "loss": 0.2571,
1094
+ "step": 7300
1095
+ },
1096
+ {
1097
+ "epoch": 58.87,
1098
+ "eval_loss": 0.32077959179878235,
1099
+ "eval_runtime": 135.1873,
1100
+ "eval_samples_per_second": 13.64,
1101
+ "eval_steps_per_second": 1.709,
1102
+ "eval_wer": 0.37889241096401266,
1103
+ "step": 7300
1104
+ },
1105
+ {
1106
+ "epoch": 59.68,
1107
+ "learning_rate": 1.2096774193548388e-06,
1108
+ "loss": 0.2546,
1109
+ "step": 7400
1110
+ },
1111
+ {
1112
+ "epoch": 59.68,
1113
+ "eval_loss": 0.3218778669834137,
1114
+ "eval_runtime": 134.9556,
1115
+ "eval_samples_per_second": 13.664,
1116
+ "eval_steps_per_second": 1.712,
1117
+ "eval_wer": 0.37945179936602647,
1118
+ "step": 7400
1119
+ },
1120
+ {
1121
+ "epoch": 60.0,
1122
+ "step": 7440,
1123
+ "total_flos": 2.7374813261347353e+19,
1124
+ "train_loss": 0.0985529641951284,
1125
+ "train_runtime": 13747.1961,
1126
+ "train_samples_per_second": 17.305,
1127
+ "train_steps_per_second": 0.541
1128
+ }
1129
+ ],
1130
+ "max_steps": 7440,
1131
+ "num_train_epochs": 60,
1132
+ "total_flos": 2.7374813261347353e+19,
1133
+ "trial_name": null,
1134
+ "trial_params": null
1135
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b28d478aa55f881af82235351b3768624d58ffaaa46aa93e2d12c1abca3dd8
3
+ size 3183
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, "|": 26, "[UNK]": 27, "[PAD]": 28}