cemsubakan commited on
Commit
edf25e2
1 Parent(s): 5ced036

pushing hyperparams

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +155 -0
hyperparams.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2021-02-26 from:
2
+ # /scratch/csubakan/speechbrain_new/recipes/WSJ2Mix/separation/yamls/dptransformer78.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: SepFormer for source separation
6
+ # https://arxiv.org/abs/2010.13154
7
+ #
8
+ # Dataset : WSJ0-mix
9
+ # ################################
10
+ # Basic parameters
11
+ # Seed needs to be set at top of yaml, before objects with parameters are made
12
+ #
13
+ seed: 1234
14
+ __set_seed: !apply:torch.manual_seed [1234]
15
+
16
+ # Data params
17
+ data_folder: /localscratch/csubakan.62709298.0/wsj0-mix/2speakers # wsj2mix or wsj3mix
18
+ experiment_name: 78-speedchange-dynamicmix-hardcodegaussian
19
+ output_folder: results/78-speedchange-dynamicmix-hardcodegaussian/1234
20
+ train_log: results/78-speedchange-dynamicmix-hardcodegaussian/1234/train_log.txt
21
+ save_folder: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save
22
+ train_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_tr.csv
23
+ valid_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_cv.csv
24
+ test_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_tt.csv
25
+ wsj0_tr: /localscratch/csubakan.62709298.0/wsj0-processed/si_tr_s/
26
+
27
+ # Experiment params
28
+ auto_mix_prec: true
29
+ test_only: false
30
+ num_spks: 2 # set to 3 for wsj0-3mix
31
+ progressbar: true
32
+ save_audio: false # Save estimated sources on disk
33
+ sample_rate: 8000
34
+
35
+ # Training parameters
36
+ N_epochs: 200
37
+ batch_size: 1
38
+ lr: 0.00015
39
+ clip_grad_norm: 5
40
+ loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
41
+ # if True, the training sequences are cut to a specified length
42
+ limit_training_signal_len: false
43
+ # this is the length of sequences if we choose to limit
44
+ # the signal length of training sequences
45
+ training_signal_len: 128000
46
+ dynamic_mixing: regular
47
+
48
+ # Augment parameters
49
+ use_wavedrop: false
50
+ use_speedperturb: true
51
+ use_speedperturb_sameforeachsource: false
52
+ use_rand_shift: false
53
+ min_shift: -8000
54
+ max_shift: 8000
55
+
56
+ # Neural parameters
57
+ N_encoder_out: 256
58
+ out_channels: 256
59
+ kernel_size: 16
60
+ kernel_stride: 8
61
+
62
+ threshold_byloss: true
63
+ threshold: -30
64
+
65
+ # Dataloader options
66
+ dataloader_opts:
67
+ batch_size: 1
68
+ num_workers: 3
69
+
70
+ speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
71
+ perturb_prob: 1.0
72
+ drop_freq_prob: 0.0
73
+ drop_chunk_prob: 0.0
74
+ sample_rate: 8000
75
+ speeds: [95, 100, 105]
76
+
77
+ wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
78
+ perturb_prob: 0.0
79
+ drop_freq_prob: 1.0
80
+ drop_chunk_prob: 1.0
81
+ sample_rate: 8000
82
+
83
+
84
+ Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
85
+ kernel_size: 16
86
+ out_channels: 256
87
+
88
+
89
+ SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
90
+ num_layers: 8
91
+ d_model: 256
92
+ nhead: 8
93
+ d_ffn: 1024
94
+ dropout: 0
95
+ use_positional_encoding: true
96
+ norm_before: true
97
+
98
+ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
99
+ num_layers: 8
100
+ d_model: 256
101
+ nhead: 8
102
+ d_ffn: 1024
103
+ dropout: 0
104
+ use_positional_encoding: true
105
+ norm_before: true
106
+
107
+ MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
108
+
109
+ num_spks: 2
110
+ in_channels: 256
111
+ out_channels: 256
112
+ num_layers: 2
113
+ K: 250
114
+ intra_model: *id001
115
+ inter_model: *id002
116
+ norm: ln
117
+ linear_layer_after_inter_intra: false
118
+ skip_around_intra: true
119
+
120
+ Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
121
+ in_channels: 256
122
+ out_channels: 1
123
+ kernel_size: 16
124
+ stride: 8
125
+ bias: false
126
+
127
+ optimizer: !name:torch.optim.Adam
128
+ lr: 0.00015
129
+ weight_decay: 0
130
+
131
+ loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
132
+
133
+ lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
134
+
135
+ factor: 0.5
136
+ patience: 4
137
+ dont_halve_until_epoch: 100
138
+
139
+ epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
140
+ limit: 200
141
+
142
+ modules:
143
+ encoder: *id003
144
+ decoder: *id004
145
+ masknet: *id005
146
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
147
+ checkpoints_dir: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save
148
+ recoverables:
149
+ encoder: *id003
150
+ decoder: *id004
151
+ masknet: *id005
152
+ counter: *id006
153
+ lr_scheduler: *id007
154
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
155
+ save_file: results/78-speedchange-dynamicmix-hardcodegaussian/1234/train_log.txt