language-identification / hyperparams.yaml

Upload hyperparams.yaml

f5358b4 about 2 years ago

5.26 kB

	# Generated 2022-10-17 from:
	# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
	# yamllint disable
	################################
	# Model: language identification with ECAPA
	# Authors: Tanel Alum��e, 2021
	# ################################

	# Basic parameters
	seed: 1988
	__set_seed: !apply:torch.manual_seed [1988]
	output_folder: results_3lang/epaca/1988
	save_folder: results_3lang/epaca/1988/save
	train_log: results_3lang/epaca/1988/train_log.txt
	data_folder: ./
	rir_folder: ./

	shards_url: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards
	train_meta: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/train/meta.json
	val_meta: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/dev/meta.json
	train_shards: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/train/shard-{000000..000013}.tar
	val_shards: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/dev/shard-000000.tar

	# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
	#shard_cache_dir:

	ckpt_interval_minutes: 5

	# Training parameters
	number_of_epochs: 40
	lr: 0.001
	lr_final: 0.0001
	sample_rate: 16000
	sentence_len: 3 # seconds

	# Feature parameters
	n_mels: 60
	left_frames: 0
	right_frames: 0
	deltas: false

	# Number of languages
	out_n_neurons: 3

	train_dataloader_options:
	num_workers: 0
	batch_size: 32

	val_dataloader_options:
	num_workers: 0
	batch_size: 16

	# Functions
	compute_features: &id003 !new:speechbrain.lobes.features.Fbank
	n_mels: 60
	left_frames: 0
	right_frames: 0
	deltas: false

	embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
	input_size: 60
	channels: [1024, 1024, 1024, 1024, 3072]
	kernel_sizes: [5, 3, 3, 3, 1]
	dilations: [1, 2, 3, 4, 1]
	attention_channels: 128
	lin_neurons: 256

	classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
	input_shape: [null, null, 256]
	activation: !name:torch.nn.LeakyReLU
	lin_blocks: 1
	lin_neurons: 512
	out_neurons: 3

	epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
	limit: 40


	augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
	sample_rate: 16000
	speeds: [90, 100, 110]


	add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
	openrir_folder: ./
	openrir_max_noise_len: 3.0 # seconds
	reverb_prob: 0.5
	noise_prob: 0.8
	noise_snr_low: 0
	noise_snr_high: 15
	rir_scale_factor: 1.0

	# Definition of the augmentation pipeline.
	# If concat_augment = False, the augmentation techniques are applied
	# in sequence. If concat_augment = True, all the augmented signals
	# # are concatenated in a single big batch.
	augment_pipeline: [id001, id002]

	concat_augment: false

	mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization

	norm_type: sentence
	std_norm: false

	modules:
	compute_features: *id003
	augment_speed: *id001
	add_rev_noise: *id002
	embedding_model: *id004
	classifier: *id005
	mean_var_norm: *id006
	compute_cost: !name:speechbrain.nnet.losses.nll_loss
	# compute_error: !name:speechbrain.nnet.losses.classification_error

	opt_class: !name:torch.optim.Adam
	lr: 0.001
	weight_decay: 0.000002

	lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
	initial_value: 0.001
	final_value: 0.0001
	epoch_count: 40

	# Logging + checkpoints
	train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
	save_file: results_3lang/epaca/1988/train_log.txt


	error_stats: !name:speechbrain.utils.metric_stats.MetricStats
	metric: !name:speechbrain.nnet.losses.classification_error
	reduction: batch

	checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
	checkpoints_dir: results_3lang/epaca/1988/save
	recoverables:
	embedding_model: *id004
	classifier: *id005
	normalizer: *id006
	counter: *id007