pplantinga commited on
Commit
0e947f2
1 Parent(s): 5738ab4

Update model to latest version

Browse files
Files changed (2) hide show
  1. enhance_model.ckpt +2 -2
  2. hyperparams.yaml +10 -56
enhance_model.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eea2ed64b9b136ccfa66741860d47b4a3ea6954bb8eb07d3212a14b601a0d3fb
3
- size 29005818
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348bdc866632457e60d9eea38aa9a511910b89cd0c1ad1b78c229535bd5b60e6
3
+ size 89230845
hyperparams.yaml CHANGED
@@ -4,67 +4,21 @@ n_fft: 512
4
  win_length: 32
5
  hop_length: 16
6
 
7
- # Enhancement model args
8
- emb_channels: 1024
9
- emb_kernel_size: 3
10
- emb_padding: same
11
- enhancer_size: 512
12
- enhancer_layers: 8
13
- enhancer_heads: 8
14
- enhancer_causal: False
15
- enhancer_drop_rate: 0.1
16
 
17
- compute_stft: !new:speechbrain.processing.features.STFT
18
- sample_rate: !ref <sample_rate>
19
  n_fft: !ref <n_fft>
20
  win_length: !ref <win_length>
21
  hop_length: !ref <hop_length>
22
-
23
- compute_istft: !new:speechbrain.processing.features.ISTFT
24
  sample_rate: !ref <sample_rate>
25
- n_fft: !ref <n_fft>
26
- win_length: !ref <win_length>
27
- hop_length: !ref <hop_length>
28
-
29
- spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
30
- power: 0.5
31
-
32
- resynth: !name:speechbrain.processing.signal_processing.resynthesize
33
- stft: !ref <compute_stft>
34
- istft: !ref <compute_istft>
35
-
36
- enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
37
- output_size: !ref <n_fft> // 2 + 1
38
- d_model: !ref <n_fft> // 2
39
- output_activation: !name:torch.nn.ReLU
40
- activation: !name:torch.nn.LeakyReLU
41
- dropout: !ref <enhancer_drop_rate>
42
- num_layers: !ref <enhancer_layers>
43
- d_ffn: !ref <enhancer_size>
44
- nhead: !ref <enhancer_heads>
45
- causal: !ref <enhancer_causal>
46
- custom_emb_module: !new:speechbrain.nnet.containers.Sequential
47
- input_shape: [null, null, !ref <n_fft> // 2 + 1]
48
- conv1: !name:speechbrain.nnet.CNN.Conv1d
49
- out_channels: !ref <emb_channels>
50
- kernel_size: 3
51
- norm1: !name:speechbrain.nnet.normalization.LayerNorm
52
- act1: !new:torch.nn.LeakyReLU
53
- conv2: !name:speechbrain.nnet.CNN.Conv1d
54
- out_channels: !ref <emb_channels> // 2
55
- kernel_size: 3
56
- norm2: !name:speechbrain.nnet.normalization.LayerNorm
57
- act2: !new:torch.nn.LeakyReLU
58
- conv3: !name:speechbrain.nnet.CNN.Conv1d
59
- out_channels: !ref <emb_channels> // 4
60
- kernel_size: 3
61
- norm3: !name:speechbrain.nnet.normalization.LayerNorm
62
- act3: !new:torch.nn.LeakyReLU
63
- conv4: !name:speechbrain.nnet.CNN.Conv1d
64
- out_channels: !ref <emb_channels> // 4
65
- kernel_size: 3
66
- norm4: !name:speechbrain.nnet.normalization.LayerNorm
67
- act4: !new:torch.nn.LeakyReLU
68
 
69
  modules:
70
  enhance_model: !ref <enhance_model>
 
4
  win_length: 32
5
  hop_length: 16
6
 
7
+ mask_weight: 0.99
 
 
 
 
 
 
 
 
8
 
9
+ # Enhancement model args
10
+ enhance_model: !new:speechbrain.lobes.models.EnhanceResnet.EnhanceResnet
11
  n_fft: !ref <n_fft>
12
  win_length: !ref <win_length>
13
  hop_length: !ref <hop_length>
 
 
14
  sample_rate: !ref <sample_rate>
15
+ channel_counts: [128, 128, 256, 256, 512, 512]
16
+ normalization: !name:speechbrain.nnet.normalization.BatchNorm2d
17
+ activation: !new:torch.nn.GELU
18
+ dense_count: 2
19
+ dense_nodes: 1024
20
+ dropout: 0.1
21
+ mask_weight: !ref <mask_weight>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  modules:
24
  enhance_model: !ref <enhance_model>