File size: 2,276 Bytes
e92b35b
3042312
990bfc1
e92b35b
 
 
 
 
d87b10a
e92b35b
 
 
 
990bfc1
e92b35b
990bfc1
e92b35b
990bfc1
 
 
 
 
 
 
e92b35b
 
990bfc1
 
 
 
 
 
 
e92b35b
 
990bfc1
 
 
 
 
 
 
e92b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
990bfc1
e92b35b
990bfc1
e92b35b
 
b6994be
 
 
 
 
990bfc1
e92b35b
990bfc1
e92b35b
 
b6994be
 
990bfc1
b6994be
e92b35b
 
990bfc1
e92b35b
 
990bfc1
 
 
 
 
 
43a3836
990bfc1
e92b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d87b10a
ba980b2
e92b35b
43a3836
e92b35b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
  "_name_or_path": "KBLab/wav2vec2-large-voxrex",
  "activation_dropout": 0.1,
  "adapter_kernel_size": 3,
  "adapter_stride": 2,
  "add_adapter": false,
  "apply_spec_augment": true,
  "architectures": [
    "Wav2Vec2ForCTC"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "classifier_proj_size": 256,
  "codevector_dim": 768,
  "contrastive_logits_temperature": 0.1,
  "conv_bias": true,
  "conv_dim": [
    512,
    512,
    512,
    512,
    512,
    512,
    512
  ],
  "conv_kernel": [
    10,
    3,
    3,
    3,
    3,
    2,
    2
  ],
  "conv_stride": [
    5,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "ctc_loss_reduction": "mean",
  "ctc_zero_infinity": false,
  "diversity_loss_weight": 0.1,
  "do_stable_layer_norm": true,
  "eos_token_id": 2,
  "feat_extract_activation": "gelu",
  "feat_extract_dropout": 0.0,
  "feat_extract_norm": "layer",
  "feat_proj_dropout": 0.0,
  "feat_quantizer_dropout": 0.0,
  "final_dropout": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "layerdrop": 0.0,
  "mask_channel_length": 10,
  "mask_channel_min_space": 1,
  "mask_channel_other": 0.0,
  "mask_channel_prob": 0.0,
  "mask_channel_selection": "static",
  "mask_feature_length": 64,
  "mask_feature_min_masks": 0,
  "mask_feature_prob": 0.25,
  "mask_time_length": 10,
  "mask_time_min_masks": 2,
  "mask_time_min_space": 1,
  "mask_time_other": 0.0,
  "mask_time_prob": 0.75,
  "mask_time_selection": "static",
  "model_type": "wav2vec2",
  "num_adapter_layers": 3,
  "num_attention_heads": 16,
  "num_codevector_groups": 2,
  "num_codevectors_per_group": 320,
  "num_conv_pos_embedding_groups": 16,
  "num_conv_pos_embeddings": 128,
  "num_feat_extract_layers": 7,
  "num_hidden_layers": 24,
  "num_negatives": 100,
  "output_hidden_size": 1024,
  "pad_token_id": 31,
  "proj_codevector_dim": 768,
  "tdnn_dilation": [
    1,
    2,
    3,
    1,
    1
  ],
  "tdnn_dim": [
    512,
    512,
    512,
    512,
    1500
  ],
  "tdnn_kernel": [
    5,
    3,
    3,
    1,
    1
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.17.0.dev0",
  "use_weighted_layer_sum": false,
  "vocab_size": 34,
  "xvector_output_dim": 512
}