bkh6722 commited on
Commit
d2a5192
•
1 Parent(s): c961ea3

End of training

Browse files
runs/{May17_14-29-05_6824c9adb81f → xlsr-d}/1652797798.814858/events.out.tfevents.1652797798.6824c9adb81f.82.1 RENAMED
File without changes
runs/{May17_14-29-05_6824c9adb81f → xlsr-d}/events.out.tfevents.1652797798.6824c9adb81f.82.0 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0c4f390e1d35a5f12fd9752c4be393ec9f4171835e5831427fe48685130c93
3
- size 11732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9066491e94d2afced19e87b563703ad58e5b95a1e5edfb126bf0819f664c10aa
3
+ size 12086
xlsr-d/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.0,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.5,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "codevector_dim": 768,
12
+ "contrastive_logits_temperature": 0.1,
13
+ "conv_bias": true,
14
+ "conv_dim": [
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "conv_kernel": [
24
+ 10,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 3,
29
+ 2,
30
+ 2
31
+ ],
32
+ "conv_stride": [
33
+ 5,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2,
39
+ 2
40
+ ],
41
+ "ctc_loss_reduction": "mean",
42
+ "ctc_zero_infinity": false,
43
+ "diversity_loss_weight": 0.1,
44
+ "do_stable_layer_norm": true,
45
+ "eos_token_id": 2,
46
+ "feat_extract_activation": "gelu",
47
+ "feat_extract_dropout": 0.0,
48
+ "feat_extract_norm": "layer",
49
+ "feat_proj_dropout": 0.0,
50
+ "feat_quantizer_dropout": 0.0,
51
+ "final_dropout": 0.0,
52
+ "gradient_checkpointing": false,
53
+ "hidden_act": "gelu",
54
+ "hidden_dropout": 0.0,
55
+ "hidden_size": 1024,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "layer_norm_eps": 1e-05,
59
+ "layerdrop": 0.0,
60
+ "mask_feature_length": 10,
61
+ "mask_feature_prob": 0.0,
62
+ "mask_time_length": 10,
63
+ "mask_time_prob": 0.05,
64
+ "model_type": "wav2vec2",
65
+ "num_attention_heads": 16,
66
+ "num_codevector_groups": 2,
67
+ "num_codevectors_per_group": 320,
68
+ "num_conv_pos_embedding_groups": 16,
69
+ "num_conv_pos_embeddings": 128,
70
+ "num_feat_extract_layers": 7,
71
+ "num_hidden_layers": 24,
72
+ "num_negatives": 100,
73
+ "pad_token_id": 24,
74
+ "proj_codevector_dim": 768,
75
+ "torch_dtype": "float32",
76
+ "transformers_version": "4.11.3",
77
+ "use_weighted_layer_sum": false,
78
+ "vocab_size": 32
79
+ }
xlsr-d/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf049beb09c71bc0521d194eaa570f8bf11d25afae6f971c26c340ba7d0177f
3
+ size 2490321361
xlsr-d/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
xlsr-d/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402105f42c09cbf3be400e8193446e1aee19765fc56c9a5557559595ff8745ec
3
+ size 1262054897
xlsr-d/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425398804cd39c73e5a1ef09f27acf2dcf98549241abe4b7813162a346e1cfc5
3
+ size 14567
xlsr-d/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:496737325eded97b0e6ca504845428eeff5ca23b97b989d666af04447fcf45c5
3
+ size 559
xlsr-d/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8fd6b64173700554787bd96b3a97002f204277035a984524a8a564f4da1aa28
3
+ size 623
xlsr-d/trainer_state.json ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 799.8,
5
+ "global_step": 1600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 49.8,
12
+ "learning_rate": 5.6999999999999996e-05,
13
+ "loss": 29.4414,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 49.8,
18
+ "eval_loss": 5.776216983795166,
19
+ "eval_runtime": 22.9959,
20
+ "eval_samples_per_second": 2.348,
21
+ "eval_steps_per_second": 0.304,
22
+ "eval_wer": 1.0,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 99.8,
27
+ "learning_rate": 0.000117,
28
+ "loss": 4.7183,
29
+ "step": 200
30
+ },
31
+ {
32
+ "epoch": 99.8,
33
+ "eval_loss": 3.2387633323669434,
34
+ "eval_runtime": 1.6681,
35
+ "eval_samples_per_second": 32.373,
36
+ "eval_steps_per_second": 4.196,
37
+ "eval_wer": 1.0,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 149.8,
42
+ "learning_rate": 0.00017699999999999997,
43
+ "loss": 2.6296,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 149.8,
48
+ "eval_loss": 3.655775785446167,
49
+ "eval_runtime": 1.6764,
50
+ "eval_samples_per_second": 32.212,
51
+ "eval_steps_per_second": 4.176,
52
+ "eval_wer": 1.0,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 199.8,
57
+ "learning_rate": 0.000237,
58
+ "loss": 0.9482,
59
+ "step": 400
60
+ },
61
+ {
62
+ "epoch": 199.8,
63
+ "eval_loss": 5.037266254425049,
64
+ "eval_runtime": 1.6945,
65
+ "eval_samples_per_second": 31.869,
66
+ "eval_steps_per_second": 4.131,
67
+ "eval_wer": 1.0144927536231885,
68
+ "step": 400
69
+ },
70
+ {
71
+ "epoch": 249.8,
72
+ "learning_rate": 0.00029699999999999996,
73
+ "loss": 0.3026,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 249.8,
78
+ "eval_loss": 5.303652763366699,
79
+ "eval_runtime": 1.6899,
80
+ "eval_samples_per_second": 31.955,
81
+ "eval_steps_per_second": 4.142,
82
+ "eval_wer": 1.1014492753623188,
83
+ "step": 500
84
+ },
85
+ {
86
+ "epoch": 299.8,
87
+ "learning_rate": 0.0002740909090909091,
88
+ "loss": 0.1812,
89
+ "step": 600
90
+ },
91
+ {
92
+ "epoch": 299.8,
93
+ "eval_loss": 5.8963623046875,
94
+ "eval_runtime": 1.7036,
95
+ "eval_samples_per_second": 31.698,
96
+ "eval_steps_per_second": 4.109,
97
+ "eval_wer": 1.0579710144927537,
98
+ "step": 600
99
+ },
100
+ {
101
+ "epoch": 349.8,
102
+ "learning_rate": 0.0002468181818181818,
103
+ "loss": 0.119,
104
+ "step": 700
105
+ },
106
+ {
107
+ "epoch": 349.8,
108
+ "eval_loss": 7.217254638671875,
109
+ "eval_runtime": 1.6846,
110
+ "eval_samples_per_second": 32.055,
111
+ "eval_steps_per_second": 4.155,
112
+ "eval_wer": 1.1014492753623188,
113
+ "step": 700
114
+ },
115
+ {
116
+ "epoch": 399.8,
117
+ "learning_rate": 0.00021954545454545452,
118
+ "loss": 0.0971,
119
+ "step": 800
120
+ },
121
+ {
122
+ "epoch": 399.8,
123
+ "eval_loss": 6.781766891479492,
124
+ "eval_runtime": 1.6983,
125
+ "eval_samples_per_second": 31.796,
126
+ "eval_steps_per_second": 4.122,
127
+ "eval_wer": 1.0289855072463767,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 449.8,
132
+ "learning_rate": 0.00019227272727272723,
133
+ "loss": 0.0581,
134
+ "step": 900
135
+ },
136
+ {
137
+ "epoch": 449.8,
138
+ "eval_loss": 6.925220489501953,
139
+ "eval_runtime": 1.679,
140
+ "eval_samples_per_second": 32.162,
141
+ "eval_steps_per_second": 4.169,
142
+ "eval_wer": 1.0579710144927537,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 499.8,
147
+ "learning_rate": 0.000165,
148
+ "loss": 0.0482,
149
+ "step": 1000
150
+ },
151
+ {
152
+ "epoch": 499.8,
153
+ "eval_loss": 6.8015570640563965,
154
+ "eval_runtime": 1.6822,
155
+ "eval_samples_per_second": 32.1,
156
+ "eval_steps_per_second": 4.161,
157
+ "eval_wer": 1.0289855072463767,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 549.8,
162
+ "learning_rate": 0.0001377272727272727,
163
+ "loss": 0.0357,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 549.8,
168
+ "eval_loss": 7.164830684661865,
169
+ "eval_runtime": 1.6855,
170
+ "eval_samples_per_second": 32.038,
171
+ "eval_steps_per_second": 4.153,
172
+ "eval_wer": 1.0289855072463767,
173
+ "step": 1100
174
+ },
175
+ {
176
+ "epoch": 599.8,
177
+ "learning_rate": 0.00011045454545454545,
178
+ "loss": 0.0292,
179
+ "step": 1200
180
+ },
181
+ {
182
+ "epoch": 599.8,
183
+ "eval_loss": 6.983569145202637,
184
+ "eval_runtime": 1.6667,
185
+ "eval_samples_per_second": 32.399,
186
+ "eval_steps_per_second": 4.2,
187
+ "eval_wer": 1.0579710144927537,
188
+ "step": 1200
189
+ },
190
+ {
191
+ "epoch": 649.8,
192
+ "learning_rate": 8.318181818181818e-05,
193
+ "loss": 0.0262,
194
+ "step": 1300
195
+ },
196
+ {
197
+ "epoch": 649.8,
198
+ "eval_loss": 7.255490779876709,
199
+ "eval_runtime": 1.6834,
200
+ "eval_samples_per_second": 32.077,
201
+ "eval_steps_per_second": 4.158,
202
+ "eval_wer": 1.0579710144927537,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 699.8,
207
+ "learning_rate": 5.590909090909091e-05,
208
+ "loss": 0.0217,
209
+ "step": 1400
210
+ },
211
+ {
212
+ "epoch": 699.8,
213
+ "eval_loss": 7.430310249328613,
214
+ "eval_runtime": 1.6718,
215
+ "eval_samples_per_second": 32.3,
216
+ "eval_steps_per_second": 4.187,
217
+ "eval_wer": 1.1014492753623188,
218
+ "step": 1400
219
+ },
220
+ {
221
+ "epoch": 749.8,
222
+ "learning_rate": 2.8636363636363634e-05,
223
+ "loss": 0.016,
224
+ "step": 1500
225
+ },
226
+ {
227
+ "epoch": 749.8,
228
+ "eval_loss": 7.406508922576904,
229
+ "eval_runtime": 1.6654,
230
+ "eval_samples_per_second": 32.424,
231
+ "eval_steps_per_second": 4.203,
232
+ "eval_wer": 1.0724637681159421,
233
+ "step": 1500
234
+ },
235
+ {
236
+ "epoch": 799.8,
237
+ "learning_rate": 1.3636363636363634e-06,
238
+ "loss": 0.0134,
239
+ "step": 1600
240
+ },
241
+ {
242
+ "epoch": 799.8,
243
+ "eval_loss": 7.357884883880615,
244
+ "eval_runtime": 1.6966,
245
+ "eval_samples_per_second": 31.829,
246
+ "eval_steps_per_second": 4.126,
247
+ "eval_wer": 1.0579710144927537,
248
+ "step": 1600
249
+ }
250
+ ],
251
+ "max_steps": 1600,
252
+ "num_train_epochs": 800,
253
+ "total_flos": 6.25600073006039e+18,
254
+ "trial_name": null,
255
+ "trial_params": null
256
+ }
xlsr-d/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05859f1c655e7fffe23fe9a1ab4f91485812cdd7671be262f9f59334f32510e7
3
+ size 2799