|
|
|
name: train_voice_voice_clip |
|
use_tb_logger: true |
|
gpu_ids: [0] |
|
start_step: 0 |
|
fp16: false |
|
checkpointing_enabled: true |
|
wandb: false |
|
|
|
datasets: |
|
train: |
|
name: clips |
|
n_workers: 4 |
|
batch_size: 512 |
|
mode: unsupervised_audio |
|
path: [/y/clips, |
|
/y/bigasr_dataset/libritts/train-clean-100, /y/bigasr_dataset/libritts/train-clean-360, |
|
/y/bigasr_dataset/libritts/train-other-500, /y/bigasr_dataset/ljspeech/wavs] |
|
exclusions: [/y/clips/books1-hifreq.txt, /y/clips/podcasts-0-hifreq.txt, |
|
/y/clips/books2-hifreq.txt, /y/bigasr_dataset/libritts-hifreq.txt] |
|
cache_path: /y/clips-cache-hifreq.pth |
|
sampling_rate: 22050 |
|
do_augmentation: false |
|
pad_to_samples: 80000 |
|
resample_clip: false |
|
min_length: 40000 |
|
debug_loading_failures: false |
|
val: |
|
name: clips_val |
|
n_workers: 1 |
|
batch_size: 512 |
|
mode: unsupervised_audio |
|
path: [/h/libritts/test-clean] |
|
cache_path: /h/libritts/test-clean/cache.pth |
|
sampling_rate: 22050 |
|
do_augmentation: false |
|
pad_to_samples: 80000 |
|
resample_clip: false |
|
min_length: 40000 |
|
debug_loading_failures: false |
|
|
|
networks: |
|
clip: |
|
type: generator |
|
which_model_G: voice_to_voice_clip |
|
kwargs: |
|
encoder_output: 512 |
|
|
|
|
|
path: |
|
strict_load: true |
|
|
|
pretrain_model_clip: voice_voice_clip.pth |
|
|
|
steps: |
|
clip_train: |
|
training: clip |
|
loss_log_buffer: 250 |
|
|
|
|
|
optimizer: adamw |
|
optimizer_params: |
|
lr: !!float 1e-4 |
|
weight_decay: 0 |
|
beta1: 0.9 |
|
beta2: 0.99 |
|
clip_grad_eps: 4 |
|
|
|
injectors: |
|
|
|
speech_to_mel: |
|
type: torch_mel_spectrogram |
|
mel_norm_file: ../experiments/clips_mel_norms.pth |
|
in: clip |
|
out: speech_mel |
|
forward: |
|
type: generator |
|
generator: clip |
|
in: [speech_mel, clip_lengths] |
|
out: clip_loss |
|
losses: |
|
clip_loss_ce: |
|
type: direct |
|
weight: 1 |
|
key: clip_loss |
|
|
|
|
|
train: |
|
niter: 500000 |
|
warmup_iter: -1 |
|
mega_batch_factor: 1 |
|
ema_rate: .999 |
|
val_freq: 500 |
|
|
|
default_lr_scheme: MultiStepLR |
|
gen_lr_steps: [ 20000, 40000, 60000 ] |
|
lr_gamma: 0.2 |
|
warmup_steps: 1000 |
|
|
|
|
|
eval: |
|
pure: true |
|
|
|
logger: |
|
print_freq: 10 |
|
save_checkpoint_freq: 500 |
|
visuals: [] |
|
is_mel_spectrogram: true |
|
visual_debug_rate: 100 |