mirco commited on
Commit
efab5aa
1 Parent(s): 6c312d4

cleaned inference hyparam file

Browse files
Files changed (2) hide show
  1. hyperparams.yaml +21 -138
  2. hyperparams_train.yaml +180 -0
hyperparams.yaml CHANGED
@@ -1,111 +1,20 @@
1
- # Generated 2021-03-29 from:
2
- # /home/mila/s/subakany/speechbrain_new/recipes/WSJ0Mix/separation/hparams/sepformer-whamr.yaml
3
- # yamllint disable
4
  # ################################
5
- # Model: SepFormer for source separation
6
  # https://arxiv.org/abs/2010.13154
7
- #
8
- # Dataset : WSJ0-2mix and WSJ0-3mix
9
- # ################################
10
- # Basic parameters
11
- # Seed needs to be set at top of yaml, before objects with parameters are made
12
- #
13
- seed: 1234
14
- __set_seed: !apply:torch.manual_seed [1234]
15
-
16
- # Data params
17
-
18
- # the data folder for the wham dataset
19
- # needs to end with wham_original for the wham dataset
20
- # needs to end with wham_reverb for the whamr dataset
21
- data_folder: /network/tmp1/subakany/whamr
22
-
23
- # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
24
- # e.g. /yourpath/wsj0-processed/si_tr_s/
25
- wsj0_tr: /yourpath/wsj0-processed/si_tr_s/
26
-
27
- experiment_name: sepformer-whamr
28
- output_folder: results/sepformer-whamr/1234
29
- train_log: results/sepformer-whamr/1234/train_log.txt
30
- save_folder: results/sepformer-whamr/1234/save
31
-
32
- # the file names should start with whamr instead of whamorg
33
- train_data: results/sepformer-whamr/1234/save/whamr_tr.csv
34
- valid_data: results/sepformer-whamr/1234/save/whamr_cv.csv
35
- test_data: results/sepformer-whamr/1234/save/whamr_tt.csv
36
- skip_prep: false
37
 
38
-
39
- # Experiment params
40
- auto_mix_prec: false # Set it to True for mixed precision
41
- test_only: false
42
- num_spks: 2 # set to 3 for wsj0-3mix
43
- progressbar: true
44
- save_audio: false # Save estimated sources on disk
45
  sample_rate: 8000
46
- n_audio_to_save: 20
47
-
48
- # Training parameters
49
- N_epochs: 200
50
- batch_size: 1
51
- lr: 0.00015
52
- clip_grad_norm: 5
53
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
54
- # if True, the training sequences are cut to a specified length
55
- limit_training_signal_len: false
56
- # this is the length of sequences if we choose to limit
57
- # the signal length of training sequences
58
- training_signal_len: 32000000
59
-
60
- # Set it to True to dynamically create mixtures at training time
61
- dynamic_mixing: false
62
-
63
- # Parameters for data augmentation
64
- use_wavedrop: false
65
- use_speedperturb: true
66
- use_speedperturb_sameforeachsource: false
67
- use_rand_shift: false
68
- min_shift: -8000
69
- max_shift: 8000
70
-
71
- speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
72
- perturb_prob: 1.0
73
- drop_freq_prob: 0.0
74
- drop_chunk_prob: 0.0
75
- sample_rate: 8000
76
- speeds: [95, 100, 105]
77
-
78
- wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
79
- perturb_prob: 0.0
80
- drop_freq_prob: 1.0
81
- drop_chunk_prob: 1.0
82
- sample_rate: 8000
83
-
84
- reverb_style: sorted
85
-
86
- # loss thresholding -- this thresholds the training loss
87
- threshold_byloss: true
88
- threshold: -30
89
-
90
- # Encoder parameters
91
- N_encoder_out: 256
92
- out_channels: 256
93
- kernel_size: 16
94
- kernel_stride: 8
95
-
96
- # Dataloader options
97
- dataloader_opts:
98
- batch_size: 1
99
- num_workers: 3
100
-
101
 
102
  # Specifying the network
103
- Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
104
  kernel_size: 16
105
  out_channels: 256
106
 
107
-
108
- SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
109
  num_layers: 8
110
  d_model: 256
111
  nhead: 8
@@ -114,7 +23,7 @@ SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
114
  use_positional_encoding: true
115
  norm_before: true
116
 
117
- SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
118
  num_layers: 8
119
  d_model: 256
120
  nhead: 8
@@ -123,58 +32,32 @@ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
123
  use_positional_encoding: true
124
  norm_before: true
125
 
126
- MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
127
-
128
- num_spks: 2
129
  in_channels: 256
130
  out_channels: 256
131
  num_layers: 2
132
  K: 250
133
- intra_model: *id001
134
- inter_model: *id002
135
  norm: ln
136
  linear_layer_after_inter_intra: false
137
  skip_around_intra: true
138
 
139
- Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
140
  in_channels: 256
141
  out_channels: 1
142
  kernel_size: 16
143
  stride: 8
144
  bias: false
145
 
146
- optimizer: !name:torch.optim.Adam
147
- lr: 0.00015
148
- weight_decay: 0
149
-
150
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
151
-
152
- lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
153
-
154
- factor: 0.5
155
- patience: 2
156
- dont_halve_until_epoch: 85
157
-
158
- epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
159
- limit: 200
160
-
161
  modules:
162
- encoder: *id003
163
- decoder: *id004
164
- masknet: *id005
165
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
166
- checkpoints_dir: results/sepformer-whamr/1234/save
167
- recoverables:
168
- encoder: *id003
169
- decoder: *id004
170
- masknet: *id005
171
- counter: *id006
172
- lr_scheduler: *id007
173
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
174
- save_file: results/sepformer-whamr/1234/train_log.txt
175
 
176
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
177
- loadables:
178
- masknet: !ref <MaskNet>
179
- encoder: !ref <Encoder>
180
- decoder: !ref <Decoder>
 
 
 
1
  # ################################
2
+ # Model: Inference for source separation with SepFormer
3
  # https://arxiv.org/abs/2010.13154
4
+ # Generated from speechbrain/recipes/WSJ0Mix/separation/train/hparams/sepformer-whamr.yaml
5
+ # Dataset : Whamr
6
+ # ###############################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Parameters
 
 
 
 
 
 
9
  sample_rate: 8000
10
+ num_spks: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Specifying the network
13
+ Encoder: !new:speechbrain.lobes.models.dual_path.Encoder
14
  kernel_size: 16
15
  out_channels: 256
16
 
17
+ SBtfintra: !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
 
18
  num_layers: 8
19
  d_model: 256
20
  nhead: 8
23
  use_positional_encoding: true
24
  norm_before: true
25
 
26
+ SBtfinter: !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
27
  num_layers: 8
28
  d_model: 256
29
  nhead: 8
32
  use_positional_encoding: true
33
  norm_before: true
34
 
35
+ MaskNet: !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
36
+ num_spks: !ref <num_spks>
 
37
  in_channels: 256
38
  out_channels: 256
39
  num_layers: 2
40
  K: 250
41
+ intra_model: !ref <SBtfintra>
42
+ inter_model: !ref <SBtfinter>
43
  norm: ln
44
  linear_layer_after_inter_intra: false
45
  skip_around_intra: true
46
 
47
+ Decoder: !new:speechbrain.lobes.models.dual_path.Decoder
48
  in_channels: 256
49
  out_channels: 1
50
  kernel_size: 16
51
  stride: 8
52
  bias: false
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  modules:
55
+ encoder: !ref <Encoder>
56
+ decoder: !ref <Decoder>
57
+ masknet: !ref <MaskNet>
 
 
 
 
 
 
 
 
 
 
58
 
59
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
60
+ loadables:
61
+ masknet: !ref <MaskNet>
62
+ encoder: !ref <Encoder>
63
+ decoder: !ref <Decoder>
hyperparams_train.yaml ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2021-03-29 from:
2
+ # /home/mila/s/subakany/speechbrain_new/recipes/WSJ0Mix/separation/hparams/sepformer-whamr.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: SepFormer for source separation
6
+ # https://arxiv.org/abs/2010.13154
7
+ #
8
+ # Dataset : WSJ0-2mix and WSJ0-3mix
9
+ # ################################
10
+ # Basic parameters
11
+ # Seed needs to be set at top of yaml, before objects with parameters are made
12
+ #
13
+ seed: 1234
14
+ __set_seed: !apply:torch.manual_seed [1234]
15
+
16
+ # Data params
17
+
18
+ # the data folder for the wham dataset
19
+ # needs to end with wham_original for the wham dataset
20
+ # needs to end with wham_reverb for the whamr dataset
21
+ data_folder: /network/tmp1/subakany/whamr
22
+
23
+ # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
24
+ # e.g. /yourpath/wsj0-processed/si_tr_s/
25
+ wsj0_tr: /yourpath/wsj0-processed/si_tr_s/
26
+
27
+ experiment_name: sepformer-whamr
28
+ output_folder: results/sepformer-whamr/1234
29
+ train_log: results/sepformer-whamr/1234/train_log.txt
30
+ save_folder: results/sepformer-whamr/1234/save
31
+
32
+ # the file names should start with whamr instead of whamorg
33
+ train_data: results/sepformer-whamr/1234/save/whamr_tr.csv
34
+ valid_data: results/sepformer-whamr/1234/save/whamr_cv.csv
35
+ test_data: results/sepformer-whamr/1234/save/whamr_tt.csv
36
+ skip_prep: false
37
+
38
+
39
+ # Experiment params
40
+ auto_mix_prec: false # Set it to True for mixed precision
41
+ test_only: false
42
+ num_spks: 2 # set to 3 for wsj0-3mix
43
+ progressbar: true
44
+ save_audio: false # Save estimated sources on disk
45
+ sample_rate: 8000
46
+ n_audio_to_save: 20
47
+
48
+ # Training parameters
49
+ N_epochs: 200
50
+ batch_size: 1
51
+ lr: 0.00015
52
+ clip_grad_norm: 5
53
+ loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
54
+ # if True, the training sequences are cut to a specified length
55
+ limit_training_signal_len: false
56
+ # this is the length of sequences if we choose to limit
57
+ # the signal length of training sequences
58
+ training_signal_len: 32000000
59
+
60
+ # Set it to True to dynamically create mixtures at training time
61
+ dynamic_mixing: false
62
+
63
+ # Parameters for data augmentation
64
+ use_wavedrop: false
65
+ use_speedperturb: true
66
+ use_speedperturb_sameforeachsource: false
67
+ use_rand_shift: false
68
+ min_shift: -8000
69
+ max_shift: 8000
70
+
71
+ speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
72
+ perturb_prob: 1.0
73
+ drop_freq_prob: 0.0
74
+ drop_chunk_prob: 0.0
75
+ sample_rate: 8000
76
+ speeds: [95, 100, 105]
77
+
78
+ wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
79
+ perturb_prob: 0.0
80
+ drop_freq_prob: 1.0
81
+ drop_chunk_prob: 1.0
82
+ sample_rate: 8000
83
+
84
+ reverb_style: sorted
85
+
86
+ # loss thresholding -- this thresholds the training loss
87
+ threshold_byloss: true
88
+ threshold: -30
89
+
90
+ # Encoder parameters
91
+ N_encoder_out: 256
92
+ out_channels: 256
93
+ kernel_size: 16
94
+ kernel_stride: 8
95
+
96
+ # Dataloader options
97
+ dataloader_opts:
98
+ batch_size: 1
99
+ num_workers: 3
100
+
101
+
102
+ # Specifying the network
103
+ Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
104
+ kernel_size: 16
105
+ out_channels: 256
106
+
107
+
108
+ SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
109
+ num_layers: 8
110
+ d_model: 256
111
+ nhead: 8
112
+ d_ffn: 1024
113
+ dropout: 0
114
+ use_positional_encoding: true
115
+ norm_before: true
116
+
117
+ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
118
+ num_layers: 8
119
+ d_model: 256
120
+ nhead: 8
121
+ d_ffn: 1024
122
+ dropout: 0
123
+ use_positional_encoding: true
124
+ norm_before: true
125
+
126
+ MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
127
+
128
+ num_spks: 2
129
+ in_channels: 256
130
+ out_channels: 256
131
+ num_layers: 2
132
+ K: 250
133
+ intra_model: *id001
134
+ inter_model: *id002
135
+ norm: ln
136
+ linear_layer_after_inter_intra: false
137
+ skip_around_intra: true
138
+
139
+ Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
140
+ in_channels: 256
141
+ out_channels: 1
142
+ kernel_size: 16
143
+ stride: 8
144
+ bias: false
145
+
146
+ optimizer: !name:torch.optim.Adam
147
+ lr: 0.00015
148
+ weight_decay: 0
149
+
150
+ loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
151
+
152
+ lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
153
+
154
+ factor: 0.5
155
+ patience: 2
156
+ dont_halve_until_epoch: 85
157
+
158
+ epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
159
+ limit: 200
160
+
161
+ modules:
162
+ encoder: *id003
163
+ decoder: *id004
164
+ masknet: *id005
165
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
166
+ checkpoints_dir: results/sepformer-whamr/1234/save
167
+ recoverables:
168
+ encoder: *id003
169
+ decoder: *id004
170
+ masknet: *id005
171
+ counter: *id006
172
+ lr_scheduler: *id007
173
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
174
+ save_file: results/sepformer-whamr/1234/train_log.txt
175
+
176
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
177
+ loadables:
178
+ masknet: !ref <MaskNet>
179
+ encoder: !ref <Encoder>
180
+ decoder: !ref <Decoder>