File size: 3,877 Bytes
db4b104 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# Generated 2022-09-22 from:
# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
# yamllint disable
################################
# Model: language identification with ECAPA
# Authors: Tanel Alum������������������e, 2021
# ################################
# Basic parameters
seed: 1988
__set_seed: !apply:torch.manual_seed [1988]
output_folder: results/epaca/1988
save_folder: results/epaca/1988/save
train_log: results/epaca/1988/train_log.txt
data_folder: ./
rir_folder: ./
shards_url: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards
train_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/meta.json
val_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/meta.json
train_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/shard-{000000..000009}.tar
val_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/shard-000000.tar
# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
#shard_cache_dir:
ckpt_interval_minutes: 5
# Training parameters
number_of_epochs: 1
lr: 0.001
lr_final: 0.0001
sample_rate: 16000
sentence_len: 3 # seconds
# Feature parameters
n_mels: 60
left_frames: 0
right_frames: 0
deltas: false
# Number of languages
out_n_neurons: 2
train_dataloader_options:
num_workers: 2
batch_size: 128
val_dataloader_options:
num_workers: 0
batch_size: 32
# Functions
compute_features: &id003 !new:speechbrain.lobes.features.Fbank
n_mels: 60
left_frames: 0
right_frames: 0
deltas: false
embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
input_size: 60
channels: [1024, 1024, 1024, 1024, 3072]
kernel_sizes: [5, 3, 3, 3, 1]
dilations: [1, 2, 3, 4, 1]
attention_channels: 128
lin_neurons: 256
classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
input_shape: [null, null, 256]
activation: !name:torch.nn.LeakyReLU
lin_blocks: 1
lin_neurons: 512
out_neurons: 2
epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 1
augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: 16000
speeds: [90, 100, 110]
add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: ./
openrir_max_noise_len: 3.0 # seconds
reverb_prob: 0.5
noise_prob: 0.8
noise_snr_low: 0
noise_snr_high: 15
rir_scale_factor: 1.0
# Definition of the augmentation pipeline.
# If concat_augment = False, the augmentation techniques are applied
# in sequence. If concat_augment = True, all the augmented signals
# # are concatenated in a single big batch.
augment_pipeline: [*id001, *id002]
concat_augment: false
mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
std_norm: false
modules:
compute_features: *id003
augment_speed: *id001
add_rev_noise: *id002
embedding_model: *id004
classifier: *id005
mean_var_norm: *id006
compute_cost: !name:speechbrain.nnet.losses.nll_loss
# compute_error: !name:speechbrain.nnet.losses.classification_error
opt_class: !name:torch.optim.Adam
lr: 0.001
weight_decay: 0.000002
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
initial_value: 0.001
final_value: 0.0001
epoch_count: 1
# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/epaca/1988/train_log.txt
error_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !name:speechbrain.nnet.losses.classification_error
reduction: batch
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/epaca/1988/save
recoverables:
embedding_model: *id004
classifier: *id005
normalizer: *id006
counter: *id007
|