File size: 3,678 Bytes
388541c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Generated 2023-05-14 from:
# /home/agorin/cryceleb2023/hparams/ecapa_voxceleb_basic.yaml
# yamllint disable
# ################################
# Model: Speaker identification with ECAPA for CryCeleb
# Authors: David Budaghyan
# ################################

ckpt_interval_minutes: 15 # save checkpoint every N min

##### SEED
seed: 3011
__set_seed: !apply:crybrain_config_utils.set_seed [3011]

# DataLoader
bs: 32
train_dataloader_options:
  batch_size: 32
  shuffle: true
val_dataloader_options:
  batch_size: 2
  shuffle: false

##### ESTIMATOR COMPONENTS
# Fbank (feature extractor)
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
  n_mels: 80
  left_frames: 0
  right_frames: 0
  deltas: false

# ECAPA
emb_dim: 192
embedding_model: &id001 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
  input_size: 80
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  groups: [1, 1, 1, 1, 1]
  attention_channels: 128
  lin_neurons: 192

# If you do not want to use the pretrained encoder you can simply delete pretrained_encoder field.
pretrained_model_name: spkrec-ecapa-voxceleb
pretrained_embedding_model_path: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt
pretrained_embedding_model: !new:speechbrain.utils.parameter_transfer.Pretrainer
  collect_in: ./experiments/ecapa_voxceleb_ft_basic/ckpts
  loadables:
    model: *id001
  paths:
    model: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt

# CLASSIFIER
n_classes: 348
                        # check-yaml disable


classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
  input_size: 192
  out_neurons: 348

##### EPOCH COUNTER
n_epochs: 1000
epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
  limit: 1000

##### OPTIMIZER
start_lr: 0.0001
opt_class: !name:torch.optim.Adam
  lr: 0.0001
  weight_decay: 0.000002

#####  LEARNING RATE SCHEDULERS
lrsched_name: cyclic
# one of:
#   onplateau
#   cyclic
lr_min: 0.0000000001
lr_scheduler: &id006 !apply:crybrain_config_utils.choose_lrsched
  lrsched_name: cyclic
  #below are kwargs, only the ones relevant to the type of scheduler will be
  #used for initialization in `choose_lrsched`

  #onplateau (ReduceLROnPlateau)
  lr_min: 0.0000000001
  factor: 0.4
  patience: 10
  dont_halve_until_epoch: 35
  #cyclic (CyclicLRScheduler)
  base_lr: 0.00000001
  max_lr: 0.0001
  step_size: 100
  mode: triangular
  gamma: 1.0
  scale_fn:
  scale_mode: cycle

sample_rate: 16000
mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization

  norm_type: sentence
  std_norm: false

modules:
  compute_features: *id002
  embedding_model: *id001
  classifier: *id003
  mean_var_norm: *id004
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
  loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
    margin: 0.2
    scale: 30

classification_stats: !name:speechbrain.utils.metric_stats.ClassificationStats
  ###################################################################
  ### OUTPUT PATHS ###


experiment_name: ecapa_voxceleb_ft_basic
               # must run from the directory which contains "experiments"


experiment_dir: ./experiments/ecapa_voxceleb_ft_basic
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: ./experiments/ecapa_voxceleb_ft_basic/train_log.txt

checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: ./experiments/ecapa_voxceleb_ft_basic/ckpts
  recoverables:
    embedding_model: *id001
    classifier: *id003
    normalizer: *id004
    counter: *id005
    lr_scheduler: *id006