File size: 3,988 Bytes
7adc9ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Generated 2022-09-26 from:
# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
# yamllint disable
################################
# Model: language identification with ECAPA
# Authors: Tanel Alum������������������������������������������������������e, 2021
# ################################



# Basic parameters
seed: 1988
__set_seed: !apply:torch.manual_seed [1988]
output_folder: results/epaca/1988
save_folder: results/epaca/1988/save
train_log: results/epaca/1988/train_log.txt
data_folder: ./
rir_folder: ./

shards_url: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards
train_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/meta.json
val_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/meta.json
train_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/shard-{000000..000009}.tar
val_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/shard-000000.tar

# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
#shard_cache_dir:

ckpt_interval_minutes: 5

# Training parameters
number_of_epochs: 40
lr: 0.001
lr_final: 0.0001
sample_rate: 16000
sentence_len: 3 # seconds

# Feature parameters
n_mels: 60
left_frames: 0
right_frames: 0
deltas: false

# Number of languages
out_n_neurons: 2

train_dataloader_options:
  num_workers: 2
  batch_size: 128

val_dataloader_options:
  num_workers: 0
  batch_size: 32

# Functions
compute_features: &id003 !new:speechbrain.lobes.features.Fbank
  n_mels: 60
  left_frames: 0
  right_frames: 0
  deltas: false

embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
  input_size: 60
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 256

classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
  input_shape: [null, null, 256]
  activation: !name:torch.nn.LeakyReLU
  lin_blocks: 1
  lin_neurons: 512
  out_neurons: 2

epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
  limit: 40


augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
  sample_rate: 16000
  speeds: [90, 100, 110]


add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
  openrir_folder: ./
  openrir_max_noise_len: 3.0    # seconds
  reverb_prob: 0.5
  noise_prob: 0.8
  noise_snr_low: 0
  noise_snr_high: 15
  rir_scale_factor: 1.0

# Definition of the augmentation pipeline.
# If concat_augment = False, the augmentation techniques are applied
# in sequence. If concat_augment = True, all the augmented signals
# # are concatenated in a single big batch.
augment_pipeline: [*id001, *id002]

concat_augment: false

mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization

  norm_type: sentence
  std_norm: false

modules:
  compute_features: *id003
  augment_speed: *id001
  add_rev_noise: *id002
  embedding_model: *id004
  classifier: *id005
  mean_var_norm: *id006
compute_cost: !name:speechbrain.nnet.losses.nll_loss
# compute_error: !name:speechbrain.nnet.losses.classification_error

opt_class: !name:torch.optim.Adam
  lr: 0.001
  weight_decay: 0.000002

lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
  initial_value: 0.001
  final_value: 0.0001
  epoch_count: 40

# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: results/epaca/1988/train_log.txt


error_stats: !name:speechbrain.utils.metric_stats.MetricStats
  metric: !name:speechbrain.nnet.losses.classification_error
    reduction: batch

checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: results/epaca/1988/save
  recoverables:
    embedding_model: *id004
    classifier: *id005
    normalizer: *id006
    counter: *id007