PIQ-ESC50 / hyperparams.yaml
cemsubakan's picture
Update hyperparams.yaml
25df5de verified
# #################################
# The recipe for training PIQ on the ESC50 dataset.
#
# Author:
# * Cem Subakan 2022, 2023
# * Francesco Paissan 2022, 2023
# (based on the SpeechBrain UrbanSound8k recipe)
# #################################
sample_rate: 16000
use_vq: true
rec_loss_coef: 1
use_mask_output: true
mask_th: 0.35
device: cpu
# Feature parameters
n_mels: 80
# Number of classes
out_n_neurons: 50
# embedding_model: !new:custom_models.Conv2dEncoder_v2
embedding_model: !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
dim: 256
classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
input_size: 256
out_neurons: 50
lin_blocks: 1
# Interpretation hyperparams
K: 1024
# pre-processing
n_fft: 1024
spec_mag_power: 0.5
hop_length: 11.6099
win_length: 23.2199
compute_stft: !new:speechbrain.processing.features.STFT
n_fft: 1024
hop_length: 11.6099
win_length: 23.2199
sample_rate: 16000
compute_fbank: !new:speechbrain.processing.features.Filterbank
n_mels: 80
n_fft: 1024
sample_rate: 16000
compute_istft: !new:speechbrain.processing.features.ISTFT
sample_rate: 16000
hop_length: 11.6099
win_length: 23.2199
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
psi_model: !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
dim: 256
K: 1024
shared_keys: 0
activate_class_partitioning: true
use_adapter: true
adapter_reduce_dim: true
modules:
compute_stft: !ref <compute_stft>
compute_fbank: !ref <compute_fbank>
compute_istft: !ref <compute_istft>
psi: !ref <psi_model>
embedding_model: !ref <embedding_model>
classifier: !ref <classifier>
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
embedding_model: !ref <embedding_model>
classifier: !ref <classifier>
psi: !ref <psi_model>
label_encoder: !ref <label_encoder>
paths:
embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt
classifier: speechbrain/PIQ-ESC50/classifier.ckpt
psi: speechbrain/PIQ-ESC50/psi_model.ckpt
label_encoder: speechbrain/cnn14-esc50/label_encoder.txt