yangwang825 commited on
Commit
6a1d27a
1 Parent(s): d4e8ed4

Upload config

Browse files
Files changed (2) hide show
  1. config.json +4 -7
  2. configuration_wav2vec2_spkreg.py +2 -0
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-base",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
@@ -7,14 +6,11 @@
7
  "add_adapter": false,
8
  "apply_spec_augment": true,
9
  "architectures": [
10
- "Wav2Vec2SpkRegModel"
11
  ],
12
  "attention_dropout": 0.1,
13
  "auto_map": {
14
- "AutoConfig": "configuration_wav2vec2_spkreg.Wav2Vec2SpkRegConfig",
15
- "AutoFeatureExtractor": "feature_extractor_wav2vec2_spkreg.Wav2Vec2SpkRegFeatureExtractor",
16
- "AutoModel": "modeling_wav2vec2_spkreg.Wav2Vec2SpkRegModel",
17
- "AutoModelForAudioClassification": "modeling_wav2vec2_spkreg.Wav2Vec2SpkRegForSequenceClassification"
18
  },
19
  "bos_token_id": 1,
20
  "classifier_proj_size": 256,
@@ -60,6 +56,7 @@
60
  "feat_quantizer_dropout": 0.0,
61
  "final_dropout": 0.0,
62
  "freeze_feat_extract_train": true,
 
63
  "hidden_act": "gelu",
64
  "hidden_dropout": 0.1,
65
  "hidden_size": 768,
@@ -101,6 +98,7 @@
101
  "proj_codevector_dim": 256,
102
  "reduction": "mean",
103
  "scale": 30.0,
 
104
  "tdnn_dilation": [
105
  1,
106
  2,
@@ -122,7 +120,6 @@
122
  1,
123
  1
124
  ],
125
- "torch_dtype": "float32",
126
  "transformers_version": "4.46.2",
127
  "use_weighted_layer_sum": false,
128
  "vocab_size": 32,
 
1
  {
 
2
  "activation_dropout": 0.0,
3
  "adapter_attn_dim": null,
4
  "adapter_kernel_size": 3,
 
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
+ "Wav2Vec2ForPreTraining"
10
  ],
11
  "attention_dropout": 0.1,
12
  "auto_map": {
13
+ "AutoConfig": "configuration_wav2vec2_spkreg.Wav2Vec2SpkRegConfig"
 
 
 
14
  },
15
  "bos_token_id": 1,
16
  "classifier_proj_size": 256,
 
56
  "feat_quantizer_dropout": 0.0,
57
  "final_dropout": 0.0,
58
  "freeze_feat_extract_train": true,
59
+ "gradient_checkpointing": true,
60
  "hidden_act": "gelu",
61
  "hidden_dropout": 0.1,
62
  "hidden_size": 768,
 
98
  "proj_codevector_dim": 256,
99
  "reduction": "mean",
100
  "scale": 30.0,
101
+ "statistic_pooling": false,
102
  "tdnn_dilation": [
103
  1,
104
  2,
 
120
  1,
121
  1
122
  ],
 
123
  "transformers_version": "4.46.2",
124
  "use_weighted_layer_sum": false,
125
  "vocab_size": 32,
configuration_wav2vec2_spkreg.py CHANGED
@@ -244,6 +244,7 @@ class Wav2Vec2SpkRegConfig(PretrainedConfig):
244
  num_adapter_layers=3,
245
  output_hidden_size=None,
246
  adapter_attn_dim=None,
 
247
  loss_fct: str = 'cross_entropy', # cross_entropy, additive_margin, additive_angular_margin
248
  label_smoothing: float = 0.0,
249
  scale: float = 30.0,
@@ -332,6 +333,7 @@ class Wav2Vec2SpkRegConfig(PretrainedConfig):
332
  self.xvector_output_dim = xvector_output_dim
333
 
334
  # Loss function parameters. Feel free to ignore for other classes.
 
335
  self.loss_fct = loss_fct
336
  self.label_smoothing = label_smoothing
337
  self.scale = scale
 
244
  num_adapter_layers=3,
245
  output_hidden_size=None,
246
  adapter_attn_dim=None,
247
+ statistic_pooling: bool = False,
248
  loss_fct: str = 'cross_entropy', # cross_entropy, additive_margin, additive_angular_margin
249
  label_smoothing: float = 0.0,
250
  scale: float = 30.0,
 
333
  self.xvector_output_dim = xvector_output_dim
334
 
335
  # Loss function parameters. Feel free to ignore for other classes.
336
+ self.statistic_pooling = statistic_pooling
337
  self.loss_fct = loss_fct
338
  self.label_smoothing = label_smoothing
339
  self.scale = scale