model: | |
_target_: pyannote.audio.models.embedding.WeSpeakerResNet293 | |
sample_rate: 16000 | |
num_channels: 1 | |
num_mel_bins: 80 | |
frame_length: 25 | |
frame_shift: 10 | |
dither: 1.0 | |
window_type: hamming | |
use_energy: false | |
model_args: | |
embed_dim: 256 | |
feat_dim: 80 | |
pooling_func: TSTP | |
two_emb_layer: false | |
data_type: shard | |
dataloader_args: | |
batch_size: 32 | |
drop_last: true | |
num_workers: 16 | |
pin_memory: false | |
prefetch_factor: 8 | |
dataset_args: | |
aug_prob: 0.6 | |
fbank_args: | |
dither: 1.0 | |
frame_length: 25 | |
frame_shift: 10 | |
num_mel_bins: 80 | |
num_frms: 200 | |
shuffle: true | |
shuffle_args: | |
shuffle_size: 2500 | |
spec_aug: false | |
spec_aug_args: | |
max_f: 8 | |
max_t: 10 | |
num_f_mask: 1 | |
num_t_mask: 1 | |
prob: 0.6 | |
speed_perturb: true | |
exp_dir: exp/ResNet293-TSTP-emb256-fbank80-num_frms200-aug0.6-spTrue-saFalse-ArcMargin-SGD-epoch150 | |
gpus: | |
- 0 | |
- 1 | |
log_batch_interval: 100 | |
loss: CrossEntropyLoss | |
loss_args: {} | |
margin_scheduler: MarginScheduler | |
margin_update: | |
epoch_iter: 17062 | |
final_margin: 0.2 | |
fix_start_epoch: 40 | |
increase_start_epoch: 20 | |
increase_type: exp | |
initial_margin: 0.0 | |
update_margin: true | |
model_init: null | |
noise_data: data/musan/lmdb | |
num_avg: 2 | |
num_epochs: 150 | |
optimizer: SGD | |
optimizer_args: | |
lr: 0.1 | |
momentum: 0.9 | |
nesterov: true | |
weight_decay: 0.0001 | |
projection_args: | |
easy_margin: false | |
embed_dim: 256 | |
num_class: 17982 | |
project_type: arc_margin | |
scale: 32.0 | |
reverb_data: data/rirs/lmdb | |
save_epoch_interval: 5 | |
scheduler: ExponentialDecrease | |
scheduler_args: | |
epoch_iter: 17062 | |
final_lr: 5.0e-05 | |
initial_lr: 0.1 | |
num_epochs: 150 | |
scale_ratio: 1.0 | |
warm_from_zero: true | |
warm_up_epoch: 6 | |
seed: 42 | |
train_data: data/vox2_dev/shard.list | |
train_label: data/vox2_dev/utt2spk | |