|
{ |
|
"base_config": "egs/tta/autoencoderkl/exp_config_base.json", |
|
"dataset": [ |
|
"AudioCaps" |
|
], |
|
"preprocess": { |
|
|
|
"processed_dir": "data", |
|
|
|
|
|
"use_spkid": false, |
|
"use_uv": false, |
|
"use_frame_pitch": false, |
|
"use_phone_pitch": false, |
|
"use_frame_energy": false, |
|
"use_phone_energy": false, |
|
"use_mel": false, |
|
"use_audio": false, |
|
"use_label": false, |
|
"use_one_hot": false, |
|
|
|
"use_caption": true, |
|
"use_melspec": true, |
|
"use_wav": false, |
|
|
|
"melspec_dir": "mel", |
|
"wav_dir": "wav" |
|
}, |
|
|
|
"log_dir": "ckpts/tta", |
|
|
|
"model": { |
|
"autoencoderkl": { |
|
"ch": 128, |
|
"ch_mult": [1,2,2,4], |
|
"num_res_blocks": 2, |
|
"in_channels": 1, |
|
"z_channels": 4, |
|
"out_ch": 1, |
|
"double_z": true |
|
} |
|
}, |
|
|
|
"train": { |
|
"adam": { |
|
"lr": 4.0e-5 |
|
}, |
|
"ddp": false, |
|
"random_seed": 12345, |
|
"batch_size": 12, |
|
"epochs": 50000, |
|
"max_steps": 1000000, |
|
"total_training_steps": 800000, |
|
"save_summary_steps": 1000, |
|
"save_checkpoints_steps": 5000, |
|
"valid_interval": 5000, |
|
"keep_checkpoint_max": 100 |
|
} |
|
} |