PIQ-ESC50 / hyperparams.yaml

adding the .ckpt files

c858225 over 1 year ago

4.31 kB

	# Generated 2023-07-14 from:
	# /data2/cloned_repos/speechbrain-clone/recipes/ESC50/interpret/hparams/piq.yaml
	# yamllint disable
	# #################################
	# The recipe for training PIQ on the ESC50 dataset.
	#
	# Author:
	# * Cem Subakan 2022, 2023
	# * Francesco Paissan 2022, 2023
	# (based on the SpeechBrain UrbanSound8k recipe)
	# #################################

	# Seed needs to be set at top of yaml, before objects with parameters are made
	seed: 1234
	__set_seed: !!python/object/apply:torch.manual_seed [1234]

	# Set up folders for reading from and writing to
	# Dataset must already exist at `audio_data_folder`
	data_folder: /data2/ESC-50-master
	# e.g., /localscratch/UrbanSound8K
	audio_data_folder: /data2/ESC-50-master/audio

	experiment_name: piq
	output_folder: ./results/piq/1234
	save_folder: ./results/piq/1234/save
	train_log: ./results/piq/1234/train_log.txt

	test_only: false
	save_interpretations: true
	interpret_period: 10

	# Tensorboard logs
	use_tensorboard: false
	tensorboard_logs_folder: ./results/piq/1234/tb_logs/

	# Path where data manifest files will be stored
	train_annotation: /data2/ESC-50-master/manifest/train.json
	valid_annotation: /data2/ESC-50-master/manifest/valid.json
	test_annotation: /data2/ESC-50-master/manifest/test.json

	# To standardize results, UrbanSound8k has pre-separated samples into
	# 10 folds for multi-fold validation
	train_fold_nums: [1, 2, 3]
	valid_fold_nums: [4]
	test_fold_nums: [5]
	skip_manifest_creation: false

	ckpt_interval_minutes: 15 # save checkpoint every N min

	# Training parameters
	number_of_epochs: 200
	batch_size: 16
	lr: 0.0002
	sample_rate: 16000
	use_vq: true
	rec_loss_coef: 1
	use_mask_output: true
	mask_th: 0.35

	device: cuda

	# Feature parameters
	n_mels: 80

	# Number of classes
	out_n_neurons: 50

	shuffle: true
	dataloader_options:
	batch_size: 16
	shuffle: true
	num_workers: 0

	epoch_counter: &id001 !new:speechbrain.utils.epoch_loop.EpochCounter

	limit: 200

	opt_class: !name:torch.optim.Adam
	lr: 0.0002
	weight_decay: 0.000002

	lr_annealing: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
	factor: 0.5
	patience: 3
	dont_halve_until_epoch: 100

	# Logging + checkpoints
	train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
	save_file: ./results/piq/1234/train_log.txt

	checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
	checkpoints_dir: ./results/piq/1234/save
	recoverables:
	psi_model: &id004 !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
	dim: 256
	K: 1024
	shared_keys: 0
	activate_class_partitioning: true
	use_adapter: true
	adapter_reduce_dim: true

	counter: *id001
	use_pretrained: true

	# embedding_model: !new:custom_models.Conv2dEncoder_v2
	embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
	dim: 256

	classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
	input_size: 256
	out_neurons: 50
	lin_blocks: 1


	# Interpretation hyperparams
	K: 1024

	# pre-processing
	n_fft: 1024
	spec_mag_power: 0.5
	hop_length: 11.6099
	win_length: 23.2199
	compute_stft: &id005 !new:speechbrain.processing.features.STFT
	n_fft: 1024
	hop_length: 11.6099
	win_length: 23.2199
	sample_rate: 16000

	compute_fbank: &id006 !new:speechbrain.processing.features.Filterbank
	n_mels: 80
	n_fft: 1024
	sample_rate: 16000

	compute_istft: &id007 !new:speechbrain.processing.features.ISTFT
	sample_rate: 16000
	hop_length: 11.6099
	win_length: 23.2199

	label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
	psi_model: *id004
	modules:
	compute_stft: *id005
	compute_fbank: *id006
	compute_istft: *id007
	psi: *id004
	embedding_model: !ref <embedding_model>
	classifier: !ref <classifier>

	embedding_model_path: fpaissan/conv2d_us8k/embedding_modelft.ckpt
	classifier_model_path: fpaissan/conv2d_us8k/classifier.ckpt
	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	embedding_model: !ref <embedding_model>
	classifier: !ref <classifier>
	psi: !ref <psi_model>
	label_encoder: !ref <label_encoder>
	paths:
	embedding_model: fpaissan/conv2d_us8k/embedding_modelft.ckpt
	classifier: fpaissan/conv2d_us8k/classifier.ckpt
	psi: /data2/PIQ-ESC50/psi_model.ckpt
	label_encoder: speechbrain/cnn14-esc50/label_encoder.txt