mirco commited on
Commit
acbaae6
1 Parent(s): 7788e58

cleaned inference hyparam file

Browse files
Files changed (2) hide show
  1. hyperparams.yaml +24 -121
  2. hyperparams_train.yaml +163 -0
hyperparams.yaml CHANGED
@@ -1,93 +1,20 @@
1
- # Generated 2021-03-09 from:
2
- # /home/mila/s/subakany/speechbrain_new/recipes/WSJ0Mix/separation/yamls/dpt-3mix-17.yaml
3
- # yamllint disable
4
  # ################################
5
- # Model: SepFormer for source separation
6
  # https://arxiv.org/abs/2010.13154
7
- #
8
- # Dataset : WSJ0-mix
9
- # ################################
10
- # Basic parameters
11
- # Seed needs to be set at top of yaml, before objects with parameters are made
12
- #
13
- seed: 1234
14
- __set_seed: !apply:torch.manual_seed [1234]
15
-
16
- # Data params
17
- data_folder: /network/tmp1/subakany/wsj0-mix/3speakers # wsj2mix or wsj3mix
18
- experiment_name: 17-augment-3mix-dm
19
- output_folder: results/17-augment-3mix-dm/1234
20
- train_log: results/17-augment-3mix-dm/1234/train_log.txt
21
- save_folder: results/17-augment-3mix-dm/1234/save
22
- train_data: results/17-augment-3mix-dm/1234/save/wsj_tr.csv
23
- valid_data: results/17-augment-3mix-dm/1234/save/wsj_cv.csv
24
- test_data: results/17-augment-3mix-dm/1234/save/wsj_tt.csv
25
- wsj0_tr: /network/tmp1/subakany/wsj0-processed/si_tr_s/
26
- skip_prep: false
27
 
28
- # Experiment params
29
- auto_mix_prec: true
30
- test_only: true
31
- num_spks: 3 # set to 3 for wsj0-3mix
32
- progressbar: true
33
- save_audio: false # Save estimated sources on disk
34
  sample_rate: 8000
 
35
 
36
- # Training parameters
37
- N_epochs: 200
38
- batch_size: 1
39
- lr: 0.00015
40
- clip_grad_norm: 5
41
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
42
- # if True, the training sequences are cut to a specified length
43
- limit_training_signal_len: false
44
- # this is the length of sequences if we choose to limit
45
- # the signal length of training sequences
46
- training_signal_len: 32000
47
- dynamic_mixing: regular
48
-
49
- # Augment parameters
50
- use_wavedrop: false
51
- use_speedperturb: true
52
- use_speedperturb_sameforeachsource: false
53
- use_rand_shift: false
54
- min_shift: -8000
55
- max_shift: 8000
56
-
57
- # Neural parameters
58
- N_encoder_out: 256
59
- out_channels: 256
60
- kernel_size: 16
61
- kernel_stride: 8
62
-
63
- threshold_byloss: true
64
- threshold: -30
65
-
66
- # Dataloader options
67
- dataloader_opts:
68
- batch_size: 1
69
- num_workers: 3
70
-
71
- speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
72
- perturb_prob: 1.0
73
- drop_freq_prob: 0.0
74
- drop_chunk_prob: 0.0
75
- sample_rate: 8000
76
- speeds: [95, 100, 105]
77
-
78
- wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
79
- perturb_prob: 0.0
80
- drop_freq_prob: 1.0
81
- drop_chunk_prob: 1.0
82
- sample_rate: 8000
83
-
84
-
85
- Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
86
  kernel_size: 16
87
  out_channels: 256
88
 
89
-
90
- SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
91
  num_layers: 8
92
  d_model: 256
93
  nhead: 8
@@ -96,7 +23,7 @@ SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
96
  use_positional_encoding: true
97
  norm_before: true
98
 
99
- SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
100
  num_layers: 8
101
  d_model: 256
102
  nhead: 8
@@ -105,59 +32,35 @@ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
105
  use_positional_encoding: true
106
  norm_before: true
107
 
108
- MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
109
-
110
- num_spks: 3
111
  in_channels: 256
112
  out_channels: 256
113
  num_layers: 2
114
  K: 250
115
- intra_model: *id001
116
- inter_model: *id002
117
  norm: ln
118
  linear_layer_after_inter_intra: false
119
  skip_around_intra: true
120
 
121
- Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
122
  in_channels: 256
123
  out_channels: 1
124
  kernel_size: 16
125
  stride: 8
126
  bias: false
127
 
128
- optimizer: !name:torch.optim.Adam
129
- lr: 0.00015
130
- weight_decay: 0
131
-
132
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
133
-
134
- lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
135
-
136
- factor: 0.5
137
- patience: 2
138
- dont_halve_until_epoch: 100
139
 
140
- epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
141
- limit: 200
 
 
 
142
 
143
- modules:
144
- encoder: *id003
145
- decoder: *id004
146
- masknet: *id005
147
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
148
- checkpoints_dir: results/17-augment-3mix-dm/1234/save
149
- recoverables:
150
- encoder: *id003
151
- decoder: *id004
152
- masknet: *id005
153
- counter: *id006
154
- lr_scheduler: *id007
155
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
156
- save_file: results/17-augment-3mix-dm/1234/train_log.txt
157
 
158
 
159
- pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
160
- loadables:
161
- masknet: !ref <MaskNet>
162
- encoder: !ref <Encoder>
163
- decoder: !ref <Decoder>
 
 
 
1
  # ################################
2
+ # Model: Inference for source separation with SepFormer
3
  # https://arxiv.org/abs/2010.13154
4
+ # Generated from speechbrain/recipes/WSJ0Mix/separation/train/hparams/sepformer-wsj03mix.yaml
5
+ # Dataset : wsj03mix
6
+ # ###############################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Parameters
 
 
 
 
 
9
  sample_rate: 8000
10
+ num_spks: 3
11
 
12
+ # Specifying the network
13
+ Encoder: !new:speechbrain.lobes.models.dual_path.Encoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  kernel_size: 16
15
  out_channels: 256
16
 
17
+ SBtfintra: !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
 
18
  num_layers: 8
19
  d_model: 256
20
  nhead: 8
23
  use_positional_encoding: true
24
  norm_before: true
25
 
26
+ SBtfinter: !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
27
  num_layers: 8
28
  d_model: 256
29
  nhead: 8
32
  use_positional_encoding: true
33
  norm_before: true
34
 
35
+ MaskNet: !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
36
+ num_spks: !ref <num_spks>
 
37
  in_channels: 256
38
  out_channels: 256
39
  num_layers: 2
40
  K: 250
41
+ intra_model: !ref <SBtfintra>
42
+ inter_model: !ref <SBtfinter>
43
  norm: ln
44
  linear_layer_after_inter_intra: false
45
  skip_around_intra: true
46
 
47
+ Decoder: !new:speechbrain.lobes.models.dual_path.Decoder
48
  in_channels: 256
49
  out_channels: 1
50
  kernel_size: 16
51
  stride: 8
52
  bias: false
53
 
54
+ modules:
55
+ encoder: !ref <Encoder>
56
+ decoder: !ref <Decoder>
57
+ masknet: !ref <MaskNet>
 
 
 
 
 
 
 
58
 
59
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
60
+ loadables:
61
+ masknet: !ref <MaskNet>
62
+ encoder: !ref <Encoder>
63
+ decoder: !ref <Decoder>
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
 
 
 
 
 
hyperparams_train.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2021-03-09 from:
2
+ # /home/mila/s/subakany/speechbrain_new/recipes/WSJ0Mix/separation/yamls/dpt-3mix-17.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: SepFormer for source separation
6
+ # https://arxiv.org/abs/2010.13154
7
+ #
8
+ # Dataset : WSJ0-mix
9
+ # ################################
10
+ # Basic parameters
11
+ # Seed needs to be set at top of yaml, before objects with parameters are made
12
+ #
13
+ seed: 1234
14
+ __set_seed: !apply:torch.manual_seed [1234]
15
+
16
+ # Data params
17
+ data_folder: /network/tmp1/subakany/wsj0-mix/3speakers # wsj2mix or wsj3mix
18
+ experiment_name: 17-augment-3mix-dm
19
+ output_folder: results/17-augment-3mix-dm/1234
20
+ train_log: results/17-augment-3mix-dm/1234/train_log.txt
21
+ save_folder: results/17-augment-3mix-dm/1234/save
22
+ train_data: results/17-augment-3mix-dm/1234/save/wsj_tr.csv
23
+ valid_data: results/17-augment-3mix-dm/1234/save/wsj_cv.csv
24
+ test_data: results/17-augment-3mix-dm/1234/save/wsj_tt.csv
25
+ wsj0_tr: /network/tmp1/subakany/wsj0-processed/si_tr_s/
26
+ skip_prep: false
27
+
28
+ # Experiment params
29
+ auto_mix_prec: true
30
+ test_only: true
31
+ num_spks: 3 # set to 3 for wsj0-3mix
32
+ progressbar: true
33
+ save_audio: false # Save estimated sources on disk
34
+ sample_rate: 8000
35
+
36
+ # Training parameters
37
+ N_epochs: 200
38
+ batch_size: 1
39
+ lr: 0.00015
40
+ clip_grad_norm: 5
41
+ loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
42
+ # if True, the training sequences are cut to a specified length
43
+ limit_training_signal_len: false
44
+ # this is the length of sequences if we choose to limit
45
+ # the signal length of training sequences
46
+ training_signal_len: 32000
47
+ dynamic_mixing: regular
48
+
49
+ # Augment parameters
50
+ use_wavedrop: false
51
+ use_speedperturb: true
52
+ use_speedperturb_sameforeachsource: false
53
+ use_rand_shift: false
54
+ min_shift: -8000
55
+ max_shift: 8000
56
+
57
+ # Neural parameters
58
+ N_encoder_out: 256
59
+ out_channels: 256
60
+ kernel_size: 16
61
+ kernel_stride: 8
62
+
63
+ threshold_byloss: true
64
+ threshold: -30
65
+
66
+ # Dataloader options
67
+ dataloader_opts:
68
+ batch_size: 1
69
+ num_workers: 3
70
+
71
+ speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
72
+ perturb_prob: 1.0
73
+ drop_freq_prob: 0.0
74
+ drop_chunk_prob: 0.0
75
+ sample_rate: 8000
76
+ speeds: [95, 100, 105]
77
+
78
+ wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
79
+ perturb_prob: 0.0
80
+ drop_freq_prob: 1.0
81
+ drop_chunk_prob: 1.0
82
+ sample_rate: 8000
83
+
84
+
85
+ Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
86
+ kernel_size: 16
87
+ out_channels: 256
88
+
89
+
90
+ SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
91
+ num_layers: 8
92
+ d_model: 256
93
+ nhead: 8
94
+ d_ffn: 1024
95
+ dropout: 0
96
+ use_positional_encoding: true
97
+ norm_before: true
98
+
99
+ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
100
+ num_layers: 8
101
+ d_model: 256
102
+ nhead: 8
103
+ d_ffn: 1024
104
+ dropout: 0
105
+ use_positional_encoding: true
106
+ norm_before: true
107
+
108
+ MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
109
+
110
+ num_spks: 3
111
+ in_channels: 256
112
+ out_channels: 256
113
+ num_layers: 2
114
+ K: 250
115
+ intra_model: *id001
116
+ inter_model: *id002
117
+ norm: ln
118
+ linear_layer_after_inter_intra: false
119
+ skip_around_intra: true
120
+
121
+ Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
122
+ in_channels: 256
123
+ out_channels: 1
124
+ kernel_size: 16
125
+ stride: 8
126
+ bias: false
127
+
128
+ optimizer: !name:torch.optim.Adam
129
+ lr: 0.00015
130
+ weight_decay: 0
131
+
132
+ loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
133
+
134
+ lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
135
+
136
+ factor: 0.5
137
+ patience: 2
138
+ dont_halve_until_epoch: 100
139
+
140
+ epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
141
+ limit: 200
142
+
143
+ modules:
144
+ encoder: *id003
145
+ decoder: *id004
146
+ masknet: *id005
147
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
148
+ checkpoints_dir: results/17-augment-3mix-dm/1234/save
149
+ recoverables:
150
+ encoder: *id003
151
+ decoder: *id004
152
+ masknet: *id005
153
+ counter: *id006
154
+ lr_scheduler: *id007
155
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
156
+ save_file: results/17-augment-3mix-dm/1234/train_log.txt
157
+
158
+
159
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
160
+ loadables:
161
+ masknet: !ref <MaskNet>
162
+ encoder: !ref <Encoder>
163
+ decoder: !ref <Decoder>