KB commited on
Commit
012a85c
1 Parent(s): 8551dbc
config.json ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "./voxrex-bert-swedish",
4
+ "architectures": [
5
+ "SpeechEncoderDecoderModel"
6
+ ],
7
+ "decoder": {
8
+ "_name_or_path": "KBLab/megatron-bert-large-swedish-cased-165k",
9
+ "add_cross_attention": true,
10
+ "architectures": [
11
+ "MegatronBertForMaskedLM"
12
+ ],
13
+ "attention_probs_dropout_prob": 0.1,
14
+ "bad_words_ids": null,
15
+ "begin_suppress_tokens": null,
16
+ "bos_token_id": 3,
17
+ "chunk_size_feed_forward": 0,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "early_stopping": false,
23
+ "encoder_no_repeat_ngram_size": 0,
24
+ "eos_token_id": 4,
25
+ "exponential_decay_length_penalty": null,
26
+ "finetuning_task": null,
27
+ "forced_bos_token_id": null,
28
+ "forced_eos_token_id": null,
29
+ "hidden_act": "gelu",
30
+ "hidden_dropout_prob": 0.1,
31
+ "hidden_size": 1024,
32
+ "id2label": {
33
+ "0": "LABEL_0",
34
+ "1": "LABEL_1"
35
+ },
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": 4096,
38
+ "is_decoder": true,
39
+ "is_encoder_decoder": false,
40
+ "label2id": {
41
+ "LABEL_0": 0,
42
+ "LABEL_1": 1
43
+ },
44
+ "layer_norm_eps": 1e-12,
45
+ "length_penalty": 1.0,
46
+ "max_length": 20,
47
+ "max_position_embeddings": 512,
48
+ "min_length": 0,
49
+ "model_type": "megatron-bert",
50
+ "no_repeat_ngram_size": 0,
51
+ "num_attention_heads": 16,
52
+ "num_beam_groups": 1,
53
+ "num_beams": 1,
54
+ "num_hidden_layers": 24,
55
+ "num_return_sequences": 1,
56
+ "output_attentions": false,
57
+ "output_hidden_states": false,
58
+ "output_scores": false,
59
+ "pad_token_id": 0,
60
+ "position_embedding_type": "absolute",
61
+ "prefix": null,
62
+ "problem_type": null,
63
+ "pruned_heads": {},
64
+ "remove_invalid_values": false,
65
+ "repetition_penalty": 1.0,
66
+ "return_dict": true,
67
+ "return_dict_in_generate": false,
68
+ "sep_token_id": null,
69
+ "suppress_tokens": null,
70
+ "task_specific_params": null,
71
+ "temperature": 1.0,
72
+ "tf_legacy_loss": false,
73
+ "tie_encoder_decoder": false,
74
+ "tie_word_embeddings": true,
75
+ "tokenizer_class": null,
76
+ "tokenizer_type": "BertWordPieceCase",
77
+ "top_k": 50,
78
+ "top_p": 1.0,
79
+ "torch_dtype": "float32",
80
+ "torchscript": false,
81
+ "transformers_version": "4.28.0.dev0",
82
+ "type_vocab_size": 2,
83
+ "typical_p": 1.0,
84
+ "use_bfloat16": false,
85
+ "use_cache": true,
86
+ "vocab_size": 64128
87
+ },
88
+ "decoder_start_token_id": 3,
89
+ "encoder": {
90
+ "_name_or_path": "KBLab/wav2vec2-large-voxrex",
91
+ "activation_dropout": 0.0,
92
+ "adapter_kernel_size": 3,
93
+ "adapter_stride": 2,
94
+ "add_adapter": true,
95
+ "add_cross_attention": false,
96
+ "apply_spec_augment": true,
97
+ "architectures": [
98
+ "Wav2Vec2ForPreTraining"
99
+ ],
100
+ "attention_dropout": 0.1,
101
+ "bad_words_ids": null,
102
+ "begin_suppress_tokens": null,
103
+ "bos_token_id": 1,
104
+ "chunk_size_feed_forward": 0,
105
+ "classifier_proj_size": 256,
106
+ "codevector_dim": 768,
107
+ "contrastive_logits_temperature": 0.1,
108
+ "conv_bias": true,
109
+ "conv_dim": [
110
+ 512,
111
+ 512,
112
+ 512,
113
+ 512,
114
+ 512,
115
+ 512,
116
+ 512
117
+ ],
118
+ "conv_kernel": [
119
+ 10,
120
+ 3,
121
+ 3,
122
+ 3,
123
+ 3,
124
+ 2,
125
+ 2
126
+ ],
127
+ "conv_stride": [
128
+ 5,
129
+ 2,
130
+ 2,
131
+ 2,
132
+ 2,
133
+ 2,
134
+ 2
135
+ ],
136
+ "cross_attention_hidden_size": null,
137
+ "ctc_loss_reduction": "sum",
138
+ "ctc_zero_infinity": false,
139
+ "decoder_start_token_id": null,
140
+ "diversity_loss_weight": 0.1,
141
+ "diversity_penalty": 0.0,
142
+ "do_sample": false,
143
+ "do_stable_layer_norm": true,
144
+ "early_stopping": false,
145
+ "encoder_no_repeat_ngram_size": 0,
146
+ "eos_token_id": 2,
147
+ "exponential_decay_length_penalty": null,
148
+ "feat_extract_activation": "gelu",
149
+ "feat_extract_dropout": 0.0,
150
+ "feat_extract_norm": "layer",
151
+ "feat_proj_dropout": 0.0,
152
+ "feat_quantizer_dropout": 0.0,
153
+ "final_dropout": 0.0,
154
+ "finetuning_task": null,
155
+ "forced_bos_token_id": null,
156
+ "forced_eos_token_id": null,
157
+ "gradient_checkpointing": false,
158
+ "hidden_act": "gelu",
159
+ "hidden_dropout": 0.1,
160
+ "hidden_size": 1024,
161
+ "id2label": {
162
+ "0": "LABEL_0",
163
+ "1": "LABEL_1"
164
+ },
165
+ "initializer_range": 0.02,
166
+ "intermediate_size": 4096,
167
+ "is_decoder": false,
168
+ "is_encoder_decoder": false,
169
+ "label2id": {
170
+ "LABEL_0": 0,
171
+ "LABEL_1": 1
172
+ },
173
+ "layer_norm_eps": 1e-05,
174
+ "layerdrop": 0.0,
175
+ "length_penalty": 1.0,
176
+ "mask_channel_length": 10,
177
+ "mask_channel_min_space": 1,
178
+ "mask_channel_other": 0.0,
179
+ "mask_channel_prob": 0.0,
180
+ "mask_channel_selection": "static",
181
+ "mask_feature_length": 10,
182
+ "mask_feature_min_masks": 0,
183
+ "mask_feature_prob": 0.0,
184
+ "mask_time_length": 10,
185
+ "mask_time_min_masks": 2,
186
+ "mask_time_min_space": 1,
187
+ "mask_time_other": 0.0,
188
+ "mask_time_prob": 0.075,
189
+ "mask_time_selection": "static",
190
+ "max_length": 20,
191
+ "min_length": 0,
192
+ "model_type": "wav2vec2",
193
+ "no_repeat_ngram_size": 0,
194
+ "num_adapter_layers": 3,
195
+ "num_attention_heads": 16,
196
+ "num_beam_groups": 1,
197
+ "num_beams": 1,
198
+ "num_codevector_groups": 2,
199
+ "num_codevectors_per_group": 320,
200
+ "num_conv_pos_embedding_groups": 16,
201
+ "num_conv_pos_embeddings": 128,
202
+ "num_feat_extract_layers": 7,
203
+ "num_hidden_layers": 24,
204
+ "num_negatives": 100,
205
+ "num_return_sequences": 1,
206
+ "output_attentions": false,
207
+ "output_hidden_size": 1024,
208
+ "output_hidden_states": false,
209
+ "output_scores": false,
210
+ "pad_token_id": 0,
211
+ "prefix": null,
212
+ "problem_type": null,
213
+ "proj_codevector_dim": 768,
214
+ "pruned_heads": {},
215
+ "remove_invalid_values": false,
216
+ "repetition_penalty": 1.0,
217
+ "return_dict": true,
218
+ "return_dict_in_generate": false,
219
+ "sep_token_id": null,
220
+ "suppress_tokens": null,
221
+ "task_specific_params": null,
222
+ "tdnn_dilation": [
223
+ 1,
224
+ 2,
225
+ 3,
226
+ 1,
227
+ 1
228
+ ],
229
+ "tdnn_dim": [
230
+ 512,
231
+ 512,
232
+ 512,
233
+ 512,
234
+ 1500
235
+ ],
236
+ "tdnn_kernel": [
237
+ 5,
238
+ 3,
239
+ 3,
240
+ 1,
241
+ 1
242
+ ],
243
+ "temperature": 1.0,
244
+ "tf_legacy_loss": false,
245
+ "tie_encoder_decoder": false,
246
+ "tie_word_embeddings": true,
247
+ "tokenizer_class": null,
248
+ "top_k": 50,
249
+ "top_p": 1.0,
250
+ "torch_dtype": null,
251
+ "torchscript": false,
252
+ "transformers_version": "4.28.0.dev0",
253
+ "typical_p": 1.0,
254
+ "use_bfloat16": false,
255
+ "use_weighted_layer_sum": false,
256
+ "vocab_size": 32,
257
+ "xvector_output_dim": 512
258
+ },
259
+ "eos_token_id": 4,
260
+ "forced_decoder_ids": null,
261
+ "is_encoder_decoder": true,
262
+ "model_type": "speech-encoder-decoder",
263
+ "pad_token_id": 0,
264
+ "tie_word_embeddings": false,
265
+ "torch_dtype": "float32",
266
+ "transformers_version": null
267
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
runs/Mar21_21-04-53_97056842a4da/1679432772.0134842/events.out.tfevents.1679432772.97056842a4da.140185.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303e7ab047cea152ac94da6b767ff0a126b72b442180e67633df70dac9953a57
3
+ size 6021
runs/Mar21_21-04-53_97056842a4da/events.out.tfevents.1679432772.97056842a4da.140185.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4055dccd6fa723b4371a673b97c6ae6123cea4e2fa69b65b41e6fb2260cce164
3
+ size 36816
runs/Mar22_06-57-48_b512f72e3704/1679468348.5537732/events.out.tfevents.1679468348.b512f72e3704.5934.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:791b15ffc40f02d97b52e28faad44b6f3876255b331cd9348782753e61554775
3
+ size 6021
runs/Mar22_06-57-48_b512f72e3704/events.out.tfevents.1679468348.b512f72e3704.5934.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f794795ef5462821af268c51ee023fe75b127671aec47d1257fd8c35266351f
3
+ size 10597
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "processor_class": "Wav2Vec2Processor",
4
+ "special_tokens_map_file": "pretrained_model_hf_large_165K/special_tokens_map.json",
5
+ "tokenizer_class": "PreTrainedTokenizerFast"
6
+ }