Spaces:

amphion
/

singing_voice_conversion

Running on A10G

singing_voice_conversion / config /autoencoderkl.json

add backend inference and inferface output

0883aa1 7 months ago

No virus

1.32 kB

	{
	"base_config": "config/base.json",
	"model_type": "AutoencoderKL",
	"task_type": "tta",
	"dataset": [
	"AudioCaps"
	],
	"preprocess": {
	// feature used for model training
	"use_spkid": false,
	"use_uv": false,
	"use_frame_pitch": false,
	"use_phone_pitch": false,
	"use_frame_energy": false,
	"use_phone_energy": false,
	"use_mel": false,
	"use_audio": false,
	"use_label": false,
	"use_one_hot": false
	},
	// model
	"model": {
	"autoencoderkl": {
	"ch": 128,
	"ch_mult": [
	1,
	1,
	2,
	2,
	4
	],
	"num_res_blocks": 2,
	"in_channels": 1,
	"z_channels": 4,
	"out_ch": 1,
	"double_z": true
	},
	"loss": {
	"kl_weight": 1e-8,
	"disc_weight": 0.5,
	"disc_factor": 1.0,
	"logvar_init": 0.0,
	"min_adapt_d_weight": 0.0,
	"max_adapt_d_weight": 10.0,
	"disc_start": 50001,
	"disc_in_channels": 1,
	"disc_num_layers": 3,
	"use_actnorm": false
	}
	},
	// train
	"train": {
	"lronPlateau": {
	"factor": 0.9,
	"patience": 100,
	"min_lr": 4.0e-5,
	"verbose": true
	},
	"adam": {
	"lr": 4.0e-4,
	"betas": [
	0.9,
	0.999
	],
	"weight_decay": 1.0e-2,
	"eps": 1.0e-8
	}
	}
	}