Spaces:

devesg
/

singing_voice_conversion

Running

add backend inference and inferface output

0883aa1 8 months ago

2.19 kB

	{
	"base_config": "egs/vocoder/gan/exp_config_base.json",
	"model_type": "GANVocoder",
	"dataset": [
	"ljspeech",
	"vctk",
	"libritts",
	],
	"dataset_path": {
	// TODO: Fill in your dataset path
	"ljspeech": "[dataset path]",
	"vctk": "[dataset path]",
	"libritts": "[dataset path]",
	},
	// TODO: Fill in the output log path. The default value is "Amphion/ckpts/vocoder"
	"log_dir": "ckpts/vocoder",
	"preprocess": {
	// TODO: Fill in the output data path. The default value is "Amphion/data"
	"processed_dir": "data",
	// acoustic features
	"extract_mel": true,
	"extract_audio": true,
	"extract_pitch": false,
	"extract_uv": false,
	"extract_amplitude_phase": false,
	"pitch_extractor": "parselmouth",
	// Features used for model training
	"use_mel": true,
	"use_frame_pitch": false,
	"use_uv": false,
	"use_audio": true,
	"n_mel": 100,
	"sample_rate": 24000
	},
	"model": {
	"generator": "hifigan",
	"discriminators": [
	"msd",
	"mpd",
	"mssbcqtd",
	"msstftd",
	],
	"hifigan": {
	"resblock": "1",
	"upsample_rates": [
	8,
	4,
	2,
	2,
	2
	],
	"upsample_kernel_sizes": [
	16,
	8,
	4,
	4,
	4
	],
	"upsample_initial_channel": 768,
	"resblock_kernel_sizes": [
	3,
	5,
	7
	],
	"resblock_dilation_sizes": [
	[
	1,
	3,
	5
	],
	[
	1,
	3,
	5
	],
	[
	1,
	3,
	5
	]
	]
	},
	"mpd": {
	"mpd_reshapes": [
	2,
	3,
	5,
	7,
	11,
	17,
	23,
	37
	],
	"use_spectral_norm": false,
	"discriminator_channel_multi": 1
	}
	},
	"train": {
	"batch_size": 16,
	"adamw": {
	"lr": 2.0e-4,
	"adam_b1": 0.8,
	"adam_b2": 0.99
	},
	"exponential_lr": {
	"lr_decay": 0.999
	},
	"criterions": [
	"feature",
	"discriminator",
	"generator",
	"mel",
	]
	},
	"inference": {
	"batch_size": 1,
	}
	}