Cyanbox
/

Prompt-Singer

singing-voice-synthesis

Model card Files Files and versions Community

Prompt-Singer / codec /config.yaml

Cyanbox's picture

add codec

e657e39 verified 2 months ago

history blame contribute delete

No virus

2.78 kB


	########### model config ###########
	generator:
	name: SoundStream
	config:
	n_filters: 32
	D: 256
	#target_bandwidths: [6,] # [1, 1.5, 2, 4, 6] # [0.5, 1, 1.5, 2, 4, 6]
	target_bandwidths: [0.5, 1, 1.5, 2, 4]
	ratios: [8, 5, 4, 2] # downsampling by 320
	sample_rate: 16000
	bins: 1024

	# Discriminator list
	#d_list: ['mpd', 'msd', 'mfd']
	d_list: ['mfd']

	mfd:
	name: MultiFrequencyDiscriminator
	config:
	hop_lengths: [32, 64, 128, 256, 512, 1024]
	hidden_channels: [64, 128, 256, 512, 512, 512]
	domain: double
	mel_scale: true
	sample_rate: 16000

	mpd:
	name: MultiPeriodDiscriminator
	config:
	period_sizes: [2, 3, 5, 7, 11]
	period_kernel_size: 5

	msd:
	name: MultiScaleDiscriminator
	config:
	num_scales: 3
	pool_kernel_size: 4
	pool_stride: 2

	########### optimizer config ###########
	optimizer:
	g:
	name: AdamW
	config:
	lr: 2e-4
	betas: [0.8, 0.99]
	eps: 1.0e-6

	d:
	name: AdamW
	config:
	lr: 2e-4
	betas: [0.8, 0.99]
	eps: 1.0e-6

	lr_scheduler:
	g:
	name: ExponentialLR
	config:
	gamma: 0.999
	d:
	name: ExponentialLR
	config:
	gamma: 0.999

	########### criterion config ###########
	criterion:
	g_criterion:
	name: losses.generator_loss.GeneratorSTFTLoss
	config:
	use_mel_loss: false
	#adv_criterion: LeastDLoss
	adv_criterion: MSEGLoss
	mel_loss_weight: 45
	use_feature_match: true
	feat_match_loss_weight: 20
	use_full_stft_loss: true # Magnitude
	use_sub_stft_loss: true # PQMF loss
	full_stft_loss_weight: 1
	sub_stft_loss_weight: 1
	mel_scale_loss:
	sampling_rate: 16000
	n_fft: 1024
	num_mels: 80
	hop_size: 160
	win_size: 800
	fmin: 0
	full_multi_scale_stft_loss: # Full-band multi-scale STFT loss.
	fft_sizes: [512, 1024, 2048]
	win_sizes: [480, 960, 1200]
	hop_sizes: [120, 240, 300]
	sub_multi_scale_stft_loss: # Sub-band multi-scale STFT loss.
	num_bands: 6
	fft_sizes: [128, 256, 256]
	win_sizes: [80, 120, 200]
	hop_sizes: [20, 40, 50]

	d_criterion:
	name: losses.discriminator_loss.MSEDiscriminatorLoss
	config: null

	commit_loss_weight: 1. #1000

	########### training and data config ###########

	seed: 2333
	cudnn_deterministic: false
	tensorboard: true # whether to use tensorboard
	#checkpoint_interval: 5
	#summary_interval: 10
	#validation_interval: 10

	checkpoint_interval: 5000
	summary_interval: 100
	validation_interval: 5000

	num_epoches: 5000
	print_freq: 10
	discriminator_iter_start: 0 # start step after which we update discriminators
	num_ckpt_keep: 10

	segment_size: 24000
	audio_norm_scale: 1.0
	batch_size: 6
	num_workers: 8
	num_plots: 8