RMSnow's picture
add backend inference and inferface output
0883aa1
raw
history blame
2.19 kB
{
"base_config": "egs/vocoder/gan/exp_config_base.json",
"model_type": "GANVocoder",
"dataset": [
"ljspeech",
"vctk",
"libritts",
],
"dataset_path": {
// TODO: Fill in your dataset path
"ljspeech": "[dataset path]",
"vctk": "[dataset path]",
"libritts": "[dataset path]",
},
// TODO: Fill in the output log path. The default value is "Amphion/ckpts/vocoder"
"log_dir": "ckpts/vocoder",
"preprocess": {
// TODO: Fill in the output data path. The default value is "Amphion/data"
"processed_dir": "data",
// acoustic features
"extract_mel": true,
"extract_audio": true,
"extract_pitch": false,
"extract_uv": false,
"extract_amplitude_phase": false,
"pitch_extractor": "parselmouth",
// Features used for model training
"use_mel": true,
"use_frame_pitch": false,
"use_uv": false,
"use_audio": true,
"n_mel": 100,
"sample_rate": 24000
},
"model": {
"generator": "hifigan",
"discriminators": [
"msd",
"mpd",
"mssbcqtd",
"msstftd",
],
"hifigan": {
"resblock": "1",
"upsample_rates": [
8,
4,
2,
2,
2
],
"upsample_kernel_sizes": [
16,
8,
4,
4,
4
],
"upsample_initial_channel": 768,
"resblock_kernel_sizes": [
3,
5,
7
],
"resblock_dilation_sizes": [
[
1,
3,
5
],
[
1,
3,
5
],
[
1,
3,
5
]
]
},
"mpd": {
"mpd_reshapes": [
2,
3,
5,
7,
11,
17,
23,
37
],
"use_spectral_norm": false,
"discriminator_channel_multi": 1
}
},
"train": {
"batch_size": 16,
"adamw": {
"lr": 2.0e-4,
"adam_b1": 0.8,
"adam_b2": 0.99
},
"exponential_lr": {
"lr_decay": 0.999
},
"criterions": [
"feature",
"discriminator",
"generator",
"mel",
]
},
"inference": {
"batch_size": 1,
}
}