subakany commited on
Commit
a8e47f9
1 Parent(s): f0ce3b4

refactoring file organization

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
brain.ckpt CHANGED
@@ -1,2 +1,3 @@
1
- avg_train_loss: 0.0
2
- step: 0
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e24193f36931b7f57932532efbdcf64971f42732383ba6808825f77db258f6
3
+ size 28
classifier_enc.ckpt CHANGED
Binary files a/classifier_enc.ckpt and b/classifier_enc.ckpt differ
 
classifier_out.ckpt CHANGED
Binary files a/classifier_out.ckpt and b/classifier_out.ckpt differ
 
counter.ckpt CHANGED
@@ -1 +1,3 @@
1
- 14
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8527a891e224136950ff32ca212b45bc93f69fbb801c3b1ebedac52775f99e61
3
+ size 2
optimizer.ckpt CHANGED
Binary files a/optimizer.ckpt and b/optimizer.ckpt differ
 
sepformer1/decoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:308586da02de3c86af1a597eabf7613d96c12d558bc1ef82274a286ab18281c2
3
- size 17272
 
 
 
 
sepformer1/encoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ba8d001c2b3fbd7cd034271448c0b51e6e7e4f33e9c0f6fc2fe80414ce9c42c
3
- size 17272
 
 
 
 
sepformer1/hyperparams.yaml DELETED
@@ -1,184 +0,0 @@
1
- # Generated 2021-09-23 from:
2
- # /home/mila/s/subakany/speechbrain_new/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
3
- # yamllint disable
4
- # ################################
5
- # Model: SepFormer for source separation
6
- # https://arxiv.org/abs/2010.13154
7
- #
8
- # Dataset : WHAMR!
9
- # ################################
10
- # Basic parameters
11
- # Seed needs to be set at top of yaml, before objects with parameters are made
12
- #
13
- seed: 3
14
- __set_seed: !apply:torch.manual_seed [3]
15
-
16
- # Data params
17
-
18
- # the data folder for the wham dataset
19
- # data_folder needs to follow the format: /yourpath/whamr.
20
- # make sure to use the name whamr at your top folder for the dataset!
21
- data_folder: /network/tmp1/subakany/whamr
22
-
23
- # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
24
- # e.g. /yourpath/wsj0-processed/si_tr_s/
25
- # you need to convert the original wsj0 to 8k
26
- # you can do this conversion with the script ../meta/preprocess_dynamic_mixing.py
27
- base_folder_dm: /network/tmp1/subakany/wsj0-processed/si_tr_s/
28
-
29
- experiment_name: sepformer-whamr
30
- output_folder: results/sepformer-whamr/3
31
- train_log: results/sepformer-whamr/3/train_log.txt
32
- save_folder: results/sepformer-whamr/3/save
33
-
34
- # the file names should start with whamr instead of whamorg
35
- train_data: results/sepformer-whamr/3/save/whamr_tr.csv
36
- valid_data: results/sepformer-whamr/3/save/whamr_cv.csv
37
- test_data: results/sepformer-whamr/3/save/whamr_tt.csv
38
- skip_prep: false
39
-
40
- # Experiment params
41
- auto_mix_prec: true # Set it to True for mixed precision
42
- test_only: false
43
- num_spks: 2 # set to 3 for wsj0-3mix
44
- progressbar: true
45
- save_audio: false # Save estimated sources on disk
46
- sample_rate: 8000
47
-
48
- # Training parameters
49
- N_epochs: 200
50
- batch_size: 1
51
- lr: 0.00015
52
- clip_grad_norm: 5
53
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
54
- # if True, the training sequences are cut to a specified length
55
- limit_training_signal_len: false
56
- # this is the length of sequences if we choose to limit
57
- # the signal length of training sequences
58
- training_signal_len: 32000000
59
-
60
- # Set it to True to dynamically create mixtures at training time
61
- dynamic_mixing: true
62
-
63
- # Parameters for data augmentation
64
-
65
- # rir_path variable points to the directory of the room impulse responses
66
- # e.g. /miniscratch/subakany/rir_wavs
67
- # If the path does not exist, it is created automatically.
68
- rir_path: /miniscratch/subakany/whamr_rirs_wav
69
-
70
- use_wavedrop: false
71
- use_speedperturb: true
72
- use_speedperturb_sameforeachsource: false
73
- use_rand_shift: false
74
- min_shift: -8000
75
- max_shift: 8000
76
-
77
- speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
78
- perturb_prob: 1.0
79
- drop_freq_prob: 0.0
80
- drop_chunk_prob: 0.0
81
- sample_rate: 8000
82
- speeds: [95, 100, 105]
83
-
84
- wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
85
- perturb_prob: 0.0
86
- drop_freq_prob: 1.0
87
- drop_chunk_prob: 1.0
88
- sample_rate: 8000
89
-
90
- # loss thresholding -- this thresholds the training loss
91
- threshold_byloss: true
92
- threshold: -30
93
-
94
- # Encoder parameters
95
- N_encoder_out: 256
96
- out_channels: 256
97
- kernel_size: 16
98
- kernel_stride: 8
99
-
100
- # Dataloader options
101
- dataloader_opts:
102
- batch_size: 1
103
- num_workers: 3
104
-
105
- # Specifying the network
106
- Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
107
- kernel_size: 16
108
- out_channels: 256
109
-
110
-
111
- SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
112
- num_layers: 8
113
- d_model: 256
114
- nhead: 8
115
- d_ffn: 1024
116
- dropout: 0
117
- use_positional_encoding: true
118
- norm_before: true
119
-
120
- SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
121
- num_layers: 8
122
- d_model: 256
123
- nhead: 8
124
- d_ffn: 1024
125
- dropout: 0
126
- use_positional_encoding: true
127
- norm_before: true
128
-
129
- MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
130
-
131
- num_spks: 2
132
- in_channels: 256
133
- out_channels: 256
134
- num_layers: 2
135
- K: 250
136
- intra_model: *id001
137
- inter_model: *id002
138
- norm: ln
139
- linear_layer_after_inter_intra: false
140
- skip_around_intra: true
141
-
142
- Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
143
- in_channels: 256
144
- out_channels: 1
145
- kernel_size: 16
146
- stride: 8
147
- bias: false
148
-
149
- optimizer: !name:torch.optim.Adam
150
- lr: 0.00015
151
- weight_decay: 0
152
-
153
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
154
-
155
- lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
156
-
157
- factor: 0.5
158
- patience: 2
159
- dont_halve_until_epoch: 85
160
-
161
- epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
162
- limit: 200
163
-
164
- modules:
165
- encoder: *id003
166
- decoder: *id004
167
- masknet: *id005
168
- save_all_checkpoints: true
169
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
170
- checkpoints_dir: results/sepformer-whamr/3/save
171
- recoverables:
172
- encoder: *id003
173
- decoder: *id004
174
- masknet: *id005
175
- counter: *id006
176
- lr_scheduler: *id007
177
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
178
- save_file: results/sepformer-whamr/3/train_log.txt
179
-
180
- pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
181
- loadables:
182
- encoder: !ref <Encoder>
183
- masknet: !ref <MaskNet>
184
- decoder: !ref <Decoder>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sepformer1/masknet.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e5772c0ed40c85a2ea12fa9c2bbf6fe5a247bec060d13e63e97db085340bb4c
3
- size 113112646