Spaces:

amphion
/

singing_voice_conversion

Running on A10G

App Files Files Community

singing_voice_conversion / egs /vocoder /gan /exp_config_base.json

RMSnow

add backend inference and inferface output

0883aa1 7 months ago

raw

history blame contribute delete

No virus

2.5 kB

	{
	"base_config": "config/vocoder.json",
	"model_type": "GANVocoder",
	// TODO: Choose your needed datasets
	"dataset": [
	"csd",
	"kising",
	"m4singer",
	"nus48e",
	"opencpop",
	"opensinger",
	"opera",
	"pjs",
	"popbutfy",
	"popcs",
	"ljspeech",
	"vctk",
	"libritts",
	],
	"dataset_path": {
	// TODO: Fill in your dataset path
	"csd": "[dataset path]",
	"kising": "[dataset path]",
	"m4singer": "[dataset path]",
	"nus48e": "[dataset path]",
	"opencpop": "[dataset path]",
	"opensinger": "[dataset path]",
	"opera": "[dataset path]",
	"pjs": "[dataset path]",
	"popbutfy": "[dataset path]",
	"popcs": "[dataset path]",
	"ljspeech": "[dataset path]",
	"vctk": "[dataset path]",
	"libritts": "[dataset path]",
	},
	// TODO: Fill in the output log path
	"log_dir": "ckpts/vocoder",
	"preprocess": {
	// Acoustic features
	"extract_mel": true,
	"extract_audio": true,
	"extract_pitch": false,
	"extract_uv": false,
	"pitch_extractor": "parselmouth",

	// Features used for model training
	"use_mel": true,
	"use_frame_pitch": false,
	"use_uv": false,
	"use_audio": true,

	// TODO: Fill in the output data path
	"processed_dir": "data/",
	"n_mel": 100,
	"sample_rate": 24000
	},
	"model": {
	// TODO: Choose your needed discriminators
	"discriminators": [
	"msd",
	"mpd",
	"msstftd",
	"mssbcqtd",
	],
	"mpd": {
	"mpd_reshapes": [
	2,
	3,
	5,
	7,
	11
	],
	"use_spectral_norm": false,
	"discriminator_channel_mult_factor": 1
	},
	"mrd": {
	"resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]],
	"use_spectral_norm": false,
	"discriminator_channel_mult_factor": 1,
	"mrd_override": false
	},
	"msstftd": {
	"filters": 32
	},
	"mssbcqtd": {
	hop_lengths: [512, 256, 256],
	filters: 32,
	max_filters: 1024,
	filters_scale: 1,
	dilations: [1, 2, 4],
	in_channels: 1,
	out_channels: 1,
	n_octaves: [9, 9, 9],
	bins_per_octaves: [24, 36, 48]
	},
	},
	"train": {
	// TODO: Choose a suitable batch size, training epoch, and save stride
	"batch_size": 32,
	"max_epoch": 1000000,
	"save_checkpoint_stride": [20],
	"adamw": {
	"lr": 2.0e-4,
	"adam_b1": 0.8,
	"adam_b2": 0.99
	},
	"exponential_lr": {
	"lr_decay": 0.999
	},
	}
	}