# ################################ # Model: Neural SI-SNR Estimator with Pool training strategy (https://arxiv.org/pdf/2110.10812.pdf) # Dataset : LibriMix and WHAMR! # ################################ sample_rate: 8000 # Specifying the network snrmin: 0 snrmax: 10 use_snr_compression: true separation_norm_type: stnorm latent_dim: 128 n_inp: 256 encoder: &id006 !new:speechbrain.nnet.containers.Sequential input_shape: [!!null '', 2, !!null ''] cnn1: !new:speechbrain.nnet.CNN.Conv1d in_channels: 2 kernel_size: 4 out_channels: 128 stride: 1 skip_transpose: true padding: valid relu1: !new:torch.nn.ReLU cnn2: !new:speechbrain.nnet.CNN.Conv1d in_channels: 128 kernel_size: 4 out_channels: 128 stride: 2 skip_transpose: true padding: valid relu2: !new:torch.nn.ReLU cnn3: !new:speechbrain.nnet.CNN.Conv1d in_channels: 128 kernel_size: 4 out_channels: 128 stride: 2 skip_transpose: true padding: valid relu3: !new:torch.nn.ReLU cnn4: !new:speechbrain.nnet.CNN.Conv1d in_channels: 128 kernel_size: 4 out_channels: 128 stride: 2 skip_transpose: true padding: valid relu4: !new:torch.nn.ReLU cnn5: !new:speechbrain.nnet.CNN.Conv1d in_channels: 128 kernel_size: 4 out_channels: 128 stride: 2 skip_transpose: true padding: valid stat_pooling: !new:speechbrain.nnet.pooling.StatisticsPooling encoder_out: &id007 !new:speechbrain.nnet.containers.Sequential input_shape: [!!null '', 256] layer1: !new:speechbrain.nnet.linear.Linear input_size: 256 n_neurons: 256 relu: !new:torch.nn.ReLU layer2: !new:speechbrain.nnet.linear.Linear input_size: 256 n_neurons: 1 sigm: !new:torch.nn.Sigmoid modules: encoder: *id006 encoder_out: *id007 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: encoder: !ref encoder_out: !ref