Spaces:
Running
Running
{ | |
"base_config": "config/vits.json", | |
"model_type": "VITS", | |
"dataset": [ | |
"hifitts" | |
], | |
"dataset_path": { | |
// TODO: Fill in your dataset path | |
"hifitts": "/mnt/workspace/xueliumeng/data/hifitts/hi_fi_tts_v0" | |
}, | |
// TODO: Fill in the output log path. The default value is "Amphion/ckpts/tts" | |
"log_dir": "/mnt/workspace/xueliumeng/data/vits_on_libritts_hifitts/logs", | |
"preprocess": { | |
"extract_audio": true, | |
"use_phone": true, | |
// linguistic features | |
"extract_phone": true, | |
"phone_extractor": "espeak", // "espeak, pypinyin, pypinyin_initials_finals, lexicon (only for language=en-us right now)" | |
// TODO: Fill in the output data path. The default value is "Amphion/data" | |
"processed_dir": "/mnt/workspace/xueliumeng/data/vits_on_libritts_hifitts/processed_data", | |
"sample_rate": 24000, | |
"train_file": "train_1w.json", | |
"valid_file": "valid.json", // validattion set | |
"use_spkid": true, // True: use speaker id for multi-speaker dataset | |
}, | |
"train": { | |
"batch_size": 16, | |
"multi_speaker_training": true, // True: train multi-speaker model; False: training single-speaker model; | |
"n_speakers": 2500, // number of speakers, while be automatically set if n_speakers is 0 and multi_speaker_training is true | |
} | |
} |