File size: 4,365 Bytes
98d299b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# Generated 2021-02-26 from:
# /scratch/csubakan/speechbrain_new/recipes/WSJ2Mix/separation/yamls/dptransformer78.yaml
# yamllint disable
# ################################
# Model: SepFormer for source separation
# https://arxiv.org/abs/2010.13154
#
# Dataset : WSJ0-mix
# ################################
# Basic parameters
# Seed needs to be set at top of yaml, before objects with parameters are made
#
seed: 1234
__set_seed: !apply:torch.manual_seed [1234]
# Data params
data_folder: /localscratch/csubakan.62709298.0/wsj0-mix/2speakers # wsj2mix or wsj3mix
experiment_name: 78-speedchange-dynamicmix-hardcodegaussian
output_folder: results/78-speedchange-dynamicmix-hardcodegaussian/1234
train_log: results/78-speedchange-dynamicmix-hardcodegaussian/1234/train_log.txt
save_folder: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save
train_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_tr.csv
valid_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_cv.csv
test_data: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save/wsj_tt.csv
wsj0_tr: /localscratch/csubakan.62709298.0/wsj0-processed/si_tr_s/
# Experiment params
auto_mix_prec: true
test_only: false
num_spks: 2 # set to 3 for wsj0-3mix
progressbar: true
save_audio: false # Save estimated sources on disk
sample_rate: 8000
# Training parameters
N_epochs: 200
batch_size: 1
lr: 0.00015
clip_grad_norm: 5
loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
# if True, the training sequences are cut to a specified length
limit_training_signal_len: false
# this is the length of sequences if we choose to limit
# the signal length of training sequences
training_signal_len: 128000
dynamic_mixing: regular
# Augment parameters
use_wavedrop: false
use_speedperturb: true
use_speedperturb_sameforeachsource: false
use_rand_shift: false
min_shift: -8000
max_shift: 8000
# Neural parameters
N_encoder_out: 256
out_channels: 256
kernel_size: 16
kernel_stride: 8
threshold_byloss: true
threshold: -30
# Dataloader options
dataloader_opts:
batch_size: 1
num_workers: 3
speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
perturb_prob: 1.0
drop_freq_prob: 0.0
drop_chunk_prob: 0.0
sample_rate: 8000
speeds: [95, 100, 105]
wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
perturb_prob: 0.0
drop_freq_prob: 1.0
drop_chunk_prob: 1.0
sample_rate: 8000
Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
kernel_size: 16
out_channels: 256
SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
num_layers: 8
d_model: 256
nhead: 8
d_ffn: 1024
dropout: 0
use_positional_encoding: true
norm_before: true
SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
num_layers: 8
d_model: 256
nhead: 8
d_ffn: 1024
dropout: 0
use_positional_encoding: true
norm_before: true
MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
num_spks: 2
in_channels: 256
out_channels: 256
num_layers: 2
K: 250
intra_model: *id001
inter_model: *id002
norm: ln
linear_layer_after_inter_intra: false
skip_around_intra: true
Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
in_channels: 256
out_channels: 1
kernel_size: 16
stride: 8
bias: false
optimizer: !name:torch.optim.Adam
lr: 0.00015
weight_decay: 0
loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
factor: 0.5
patience: 4
dont_halve_until_epoch: 100
epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 200
modules:
encoder: *id003
decoder: *id004
masknet: *id005
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/78-speedchange-dynamicmix-hardcodegaussian/1234/save
recoverables:
encoder: *id003
decoder: *id004
masknet: *id005
counter: *id006
lr_scheduler: *id007
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/78-speedchange-dynamicmix-hardcodegaussian/1234/train_log.txt
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
masknet: !ref <MaskNet>
encoder: !ref <Encoder>
decoder: !ref <Decoder>
|