pplantinga commited on
Commit
f79a1ca
1 Parent(s): b9c9e2d

Add hparams and update ckpt to correspond to it

Browse files
Files changed (2) hide show
  1. enhance_model.ckpt +2 -2
  2. hyperparams.yaml +74 -0
enhance_model.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02486391563e41215feee6aaae0ebf3cbfbf27dcdce08b913201749e3f28c708
3
- size 29013562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea2ed64b9b136ccfa66741860d47b4a3ea6954bb8eb07d3212a14b601a0d3fb
3
+ size 29005818
hyperparams.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # STFT arguments
2
+ sample_rate: 16000
3
+ n_fft: 512
4
+ win_length: 32
5
+ hop_length: 16
6
+
7
+ # Enhancement model args
8
+ emb_channels: 1024
9
+ emb_kernel_size: 3
10
+ emb_padding: same
11
+ enhancer_size: 512
12
+ enhancer_layers: 8
13
+ enhancer_heads: 8
14
+ enhancer_causal: False
15
+ enhancer_drop_rate: 0.1
16
+
17
+ compute_stft: !new:speechbrain.processing.features.STFT
18
+ sample_rate: !ref <sample_rate>
19
+ n_fft: !ref <n_fft>
20
+ win_length: !ref <win_length>
21
+ hop_length: !ref <hop_length>
22
+
23
+ compute_istft: !new:speechbrain.processing.features.ISTFT
24
+ sample_rate: !ref <sample_rate>
25
+ n_fft: !ref <n_fft>
26
+ win_length: !ref <win_length>
27
+ hop_length: !ref <hop_length>
28
+
29
+ spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
30
+ power: 0.5
31
+
32
+ resynth: !name:speechbrain.processing.signal_processing.resynthesize
33
+ stft: !ref <compute_stft>
34
+ istft: !ref <compute_istft>
35
+
36
+ enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
37
+ output_size: !ref <n_fft> // 2 + 1
38
+ d_model: !ref <n_fft> // 2
39
+ output_activation: !name:torch.nn.ReLU
40
+ activation: !name:torch.nn.LeakyReLU
41
+ dropout: !ref <enhancer_drop_rate>
42
+ num_layers: !ref <enhancer_layers>
43
+ d_ffn: !ref <enhancer_size>
44
+ nhead: !ref <enhancer_heads>
45
+ causal: !ref <enhancer_causal>
46
+ custom_emb_module: !new:speechbrain.nnet.containers.Sequential
47
+ input_shape: [null, null, !ref <n_fft> // 2 + 1]
48
+ conv1: !name:speechbrain.nnet.CNN.Conv1d
49
+ out_channels: !ref <emb_channels>
50
+ kernel_size: 3
51
+ norm1: !name:speechbrain.nnet.normalization.LayerNorm
52
+ act1: !new:torch.nn.LeakyReLU
53
+ conv2: !name:speechbrain.nnet.CNN.Conv1d
54
+ out_channels: !ref <emb_channels> // 2
55
+ kernel_size: 3
56
+ norm2: !name:speechbrain.nnet.normalization.LayerNorm
57
+ act2: !new:torch.nn.LeakyReLU
58
+ conv3: !name:speechbrain.nnet.CNN.Conv1d
59
+ out_channels: !ref <emb_channels> // 4
60
+ kernel_size: 3
61
+ norm3: !name:speechbrain.nnet.normalization.LayerNorm
62
+ act3: !new:torch.nn.LeakyReLU
63
+ conv4: !name:speechbrain.nnet.CNN.Conv1d
64
+ out_channels: !ref <emb_channels> // 4
65
+ kernel_size: 3
66
+ norm4: !name:speechbrain.nnet.normalization.LayerNorm
67
+ act4: !new:torch.nn.LeakyReLU
68
+
69
+ modules:
70
+ enhance_model: !ref <enhance_model>
71
+
72
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
73
+ loadables:
74
+ enhance_model: !ref <enhance_model>