Spaces:

yl12053
/

so-vits-4.1-Special-Week-Remake

Runtime error

20c7e28 over 1 year ago

1.52 kB

	data:
	sampling_rate: 44100
	block_size: 512 # Equal to hop_length
	duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
	encoder: 'vec768l12' # 'hubertsoft', 'vec256l9', 'vec768l12'
	cnhubertsoft_gate: 10
	encoder_sample_rate: 16000
	encoder_hop_size: 320
	encoder_out_channels: 768 # 256 if using 'hubertsoft'
	training_files: "filelists/train.txt"
	validation_files: "filelists/val.txt"
	extensions: # List of extension included in the data collection
	- wav
	unit_interpolate_mode: "nearest"
	model:
	type: 'Diffusion'
	n_layers: 20
	n_chans: 512
	n_hidden: 256
	use_pitch_aug: true
	n_spk: 1 # max number of different speakers
	device: cuda
	vocoder:
	type: 'nsf-hifigan'
	ckpt: 'pretrain/nsf_hifigan/model'
	infer:
	speedup: 10
	method: 'dpm-solver' # 'pndm' or 'dpm-solver'
	env:
	expdir: logs/44k/diffusion
	gpu_id: 0
	train:
	num_workers: 2 # If your cpu and gpu are both very strong, set to 0 may be faster!
	amp_dtype: fp32 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu)
	batch_size: 48
	cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
	cache_device: 'cpu' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
	cache_fp16: true
	epochs: 100000
	interval_log: 10
	interval_val: 2000
	interval_force_save: 10000
	lr: 0.0002
	decay_step: 100000
	gamma: 0.5
	weight_decay: 0
	save_opt: false
	spk:
	'nyaru': 0