cemsubakan commited on
Commit
6ceca85
1 Parent(s): ac23d45

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +1 -119
hyperparams.yaml CHANGED
@@ -1,92 +1,10 @@
1
- # Generated 2022-06-30 from:
2
- # /home/cem/Dropbox/speechbrain-1/recipes/WHAMandWHAMR/enhancement/yamls/sepformer-wham-16k.yaml
3
- # yamllint disable
4
  # ################################
5
- # Model: SepFormer for source separation
6
- # https://arxiv.org/abs/2010.13154
7
- #
8
  # Dataset : WHAM!
9
  # ################################
10
- # Basic parameters
11
- # Seed needs to be set at top of yaml, before objects with parameters are made
12
- #
13
- seed: 1234
14
- __set_seed: !apply:torch.manual_seed [1234]
15
 
16
- # Data params
17
-
18
- # the data folder for the wham dataset
19
- # needs to end with wham_original for the wham dataset
20
- # respecting this convention effects the code functionality
21
- data_folder: /data2/wham_original/
22
- task: enhancement
23
- dereverberate: false
24
-
25
- # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
26
- # e.g. /yourpath/wsj0-processed/si_tr_s/
27
- ## you need to convert the original wsj0 to 8k
28
- # you can do this conversion with ../meta/preprocess_dynamic_mixing.py
29
- base_folder_dm: /yourpath/wsj0-processed/si_tr_s/
30
-
31
- experiment_name: sepformer-wham-16k
32
- output_folder: results/sepformer-wham-16k/1234
33
- train_log: results/sepformer-wham-16k/1234/train_log.txt
34
- save_folder: results/sepformer-wham-16k/1234/save
35
-
36
- # the file names should start with whamr instead of whamorg
37
- train_data: results/sepformer-wham-16k/1234/save/whamorg_tr.csv
38
- valid_data: results/sepformer-wham-16k/1234/save/whamorg_cv.csv
39
- test_data: results/sepformer-wham-16k/1234/save/whamorg_tt.csv
40
- skip_prep: false
41
-
42
-
43
- # Experiment params
44
- auto_mix_prec: true # Set it to True for mixed precision
45
- test_only: true
46
  num_spks: 1 # set to 3 for wsj0-3mix
47
- noprogressbar: false
48
- save_audio: true # Save estimated sources on disk
49
  sample_rate: 16000
50
- n_audio_to_save: 20
51
-
52
- # Training parameters
53
- N_epochs: 200
54
- batch_size: 1
55
- lr: 0.00015
56
- clip_grad_norm: 5
57
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
58
- # if True, the training sequences are cut to a specified length
59
- limit_training_signal_len: true
60
- # this is the length of sequences if we choose to limit
61
- # the signal length of training sequences
62
- training_signal_len: 64000
63
-
64
- # Set it to True to dynamically create mixtures at training time
65
- dynamic_mixing: false
66
-
67
- # Parameters for data augmentation
68
- use_wavedrop: false
69
- use_speedperturb: true
70
- use_rand_shift: false
71
- min_shift: -8000
72
- max_shift: 8000
73
-
74
- speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
75
- perturb_prob: 1.0
76
- drop_freq_prob: 0.0
77
- drop_chunk_prob: 0.0
78
- sample_rate: 16000
79
- speeds: [95, 100, 105]
80
-
81
- wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
82
- perturb_prob: 0.0
83
- drop_freq_prob: 1.0
84
- drop_chunk_prob: 1.0
85
- sample_rate: 16000
86
-
87
- # loss thresholding -- this thresholds the training loss
88
- threshold_byloss: true
89
- threshold: -30
90
 
91
  # Encoder parameters
92
  N_encoder_out: 256
@@ -94,21 +12,11 @@ out_channels: 256
94
  kernel_size: 16
95
  kernel_stride: 8
96
 
97
- # Dataloader options
98
- dataloader_opts:
99
- batch_size: 1
100
- num_workers: 3
101
-
102
- dataloader_opts_valid:
103
- batch_size: 1
104
- num_workers: 3
105
-
106
  # Specifying the network
107
  Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
108
  kernel_size: 16
109
  out_channels: 256
110
 
111
-
112
  SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
113
  num_layers: 8
114
  d_model: 256
@@ -128,7 +36,6 @@ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
128
  norm_before: true
129
 
130
  MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
131
-
132
  num_spks: 1
133
  in_channels: 256
134
  out_channels: 256
@@ -147,35 +54,10 @@ Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
147
  stride: 8
148
  bias: false
149
 
150
- optimizer: !name:torch.optim.Adam
151
- lr: 0.00015
152
- weight_decay: 0
153
-
154
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
155
-
156
- lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
157
-
158
- factor: 0.5
159
- patience: 2
160
- dont_halve_until_epoch: 65
161
-
162
- epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
163
- limit: 200
164
-
165
  modules:
166
  encoder: *id003
167
  decoder: *id004
168
  masknet: *id005
169
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
170
- checkpoints_dir: results/sepformer-wham-16k/1234/save
171
- recoverables:
172
- encoder: *id003
173
- decoder: *id004
174
- masknet: *id005
175
- counter: *id006
176
- lr_scheduler: *id007
177
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
178
- save_file: results/sepformer-wham-16k/1234/train_log.txt
179
 
180
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
181
  loadables:
 
 
 
 
1
  # ################################
2
+ # Model: Pretrained SepFormer for speech enhancement
 
 
3
  # Dataset : WHAM!
4
  # ################################
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  num_spks: 1 # set to 3 for wsj0-3mix
 
 
7
  sample_rate: 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Encoder parameters
10
  N_encoder_out: 256
 
12
  kernel_size: 16
13
  kernel_stride: 8
14
 
 
 
 
 
 
 
 
 
 
15
  # Specifying the network
16
  Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
17
  kernel_size: 16
18
  out_channels: 256
19
 
 
20
  SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
21
  num_layers: 8
22
  d_model: 256
 
36
  norm_before: true
37
 
38
  MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
 
39
  num_spks: 1
40
  in_channels: 256
41
  out_channels: 256
 
54
  stride: 8
55
  bias: false
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  modules:
58
  encoder: *id003
59
  decoder: *id004
60
  masknet: *id005
 
 
 
 
 
 
 
 
 
 
61
 
62
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
63
  loadables: