|
{ |
|
"dataset_path": { |
|
"LJSpeech": "/home/datasets/LJSpeech-1.1", |
|
}, |
|
"base_config": "config/base.json", |
|
"dataset": [ |
|
"LJSpeech", |
|
], |
|
"preprocess": { |
|
"trim_silence": false, |
|
"num_silent_frames": 8, |
|
"trim_fft_size": 512, |
|
"trim_hop_size": 128, |
|
"trim_top_db": 30, |
|
"extract_mel": true, |
|
"extract_mcep": false, |
|
"extract_pitch": true, |
|
"extract_uv": true, |
|
"pitch_norm": false, |
|
"extract_audio": true, |
|
"extract_label": false, |
|
"pitch_extractor": "parselmouth", |
|
"extract_energy": false, |
|
"energy_norm": false, |
|
"energy_extract_mode": "from_mel", |
|
"extract_duration": false, |
|
"mel_min_max_norm": false, |
|
"mu_law_norm": false, |
|
"extract_whisper_feature": false, |
|
"extract_contentvec_feature": false, |
|
"extract_mert_feature": false, |
|
"extract_wenet_feature": false, |
|
"n_mel": 80, |
|
"win_size": 1024, |
|
"hop_size": 256, |
|
"sample_rate": 22050, |
|
"n_fft": 1024, |
|
"fmin": 0, |
|
"fmax": 8000, |
|
"min_level_db": -115, |
|
"ref_level_db": 20, |
|
"bits": 8, |
|
"processed_dir": "processed_data", |
|
"trimmed_wav_dir": "trimmed_wavs", |
|
"wav_dir": "wavs", |
|
"audio_dir": "audios", |
|
"label_dir": "labels", |
|
"mel_dir": "mels", |
|
"mcep_dir": "mcep", |
|
"dur_dir": "durs", |
|
"lab_dir": "labs", |
|
"wenet_dir": "wenet", |
|
"contentvec_dir": "contentvec", |
|
"pitch_dir": "pitches", |
|
"energy_dir": "energys", |
|
"uv_dir": "uvs", |
|
"duration_dir": "duration", |
|
"phone_seq_file": "phone_seq_file", |
|
"file_lst": "file.lst", |
|
"train_file": "train.json", |
|
"valid_file": "test.json", |
|
"spk2id": "spk2id.json", |
|
"utt2spk": "utt2spk", |
|
"emo2id": "emo2id.json", |
|
"utt2emo": "utt2emo", |
|
"use_phn_seq": false, |
|
"use_lab": false, |
|
"use_mel": true, |
|
"use_wav": false, |
|
"use_phone_pitch": false, |
|
"use_log_scale_pitch": false, |
|
"use_phone_energy": false, |
|
"use_phone_duration": false, |
|
"use_log_scale_energy": false, |
|
"use_wenet": false, |
|
"use_dur": false, |
|
"use_spkid": false, |
|
"use_emoid": false, |
|
"use_frame_pitch": false, |
|
"use_uv": true, |
|
"use_frame_energy": false, |
|
"use_frame_duration": false, |
|
"use_audio": true, |
|
"use_label": false, |
|
"use_one_hot": false, |
|
"data_augment": false, |
|
"align_mel_duration": false, |
|
"f0_min": 50, |
|
"f0_max": 1100, |
|
"pitch_bin": 256, |
|
"pitch_max": 1100.0, |
|
"pitch_min": 50.0, |
|
"cut_mel_frame": 32, |
|
"use_min_max_norm_mel": false, |
|
}, |
|
"train": { |
|
"ddp": false, |
|
"random_seed": 970227, |
|
"batch_size": 16, |
|
"epochs": 50000, |
|
"max_steps": 1000000, |
|
"total_training_steps": 50000, |
|
"save_summary_steps": 500, |
|
"save_checkpoints_steps": 10000, |
|
"valid_interval": 10000, |
|
"keep_checkpoint_max": 15, |
|
"multi_speaker_training": false, |
|
"adamw": { |
|
"lr": 0.0002, |
|
"adam_b1": 0.8, |
|
"adam_b2": 0.99, |
|
}, |
|
"exponential_lr": { |
|
"lr_decay": 0.999, |
|
}, |
|
"criterions": [ |
|
"feature", |
|
"discriminator", |
|
"generator", |
|
"mel", |
|
"wav", |
|
], |
|
}, |
|
"model_type": "GANVocoder", |
|
"model": { |
|
"generator": "hifigan", |
|
"discriminators": [ |
|
"msd", |
|
"mpd", |
|
"msstftd", |
|
"mscqtd", |
|
], |
|
"hifigan": { |
|
"resblock": "2", |
|
"upsample_rates": [ |
|
8, |
|
8, |
|
4, |
|
], |
|
"upsample_kernel_sizes": [ |
|
16, |
|
16, |
|
8, |
|
], |
|
"upsample_initial_channel": 256, |
|
"resblock_kernel_sizes": [ |
|
3, |
|
5, |
|
7, |
|
], |
|
"resblock_dilation_sizes": [ |
|
[ |
|
1, |
|
2, |
|
], |
|
[ |
|
2, |
|
6, |
|
], |
|
[ |
|
3, |
|
12, |
|
], |
|
], |
|
}, |
|
"mpd": { |
|
"mpd_reshapes": [ |
|
2, |
|
3, |
|
5, |
|
7, |
|
11, |
|
], |
|
"use_spectral_norm": false, |
|
"discriminator_channel_multi": 1, |
|
}, |
|
"msstftd": { |
|
"filters": 32, |
|
}, |
|
"mscqtd": { |
|
"hop_lengths": [ |
|
512, |
|
256, |
|
256, |
|
], |
|
"filters": 32, |
|
"max_filters": 1024, |
|
"filters_scale": 1, |
|
"dilations": [ |
|
1, |
|
2, |
|
4, |
|
], |
|
"in_channels": 1, |
|
"out_channels": 1, |
|
"n_octaves": [ |
|
9, |
|
9, |
|
9, |
|
], |
|
"bins_per_octaves": [ |
|
24, |
|
36, |
|
48, |
|
], |
|
}, |
|
}, |
|
"exp_name": "hifigan", |
|
} |