Louise-Belcher
/
Finetune Model for WAV 27db Sample Rate 22050
/finetune_archived_230424-020826
/train_louise_230424-020648.log
23-04-24 02:06:48.155 - INFO: name: louise | |
model: extensibletrainer | |
scale: 1 | |
gpu_ids: [0] | |
start_step: 0 | |
checkpointing_enabled: True | |
fp16: True | |
bitsandbytes: True | |
gpus: 1 | |
datasets:[ | |
train:[ | |
name: training | |
n_workers: 2 | |
batch_size: 34 | |
mode: paired_voice_audio | |
path: ./training/louise/train.txt | |
fetcher_mode: ['lj'] | |
phase: train | |
max_wav_length: 255995 | |
max_text_length: 200 | |
sample_rate: 22050 | |
load_conditioning: True | |
num_conditioning_candidates: 2 | |
conditioning_length: 44000 | |
use_bpe_tokenizer: True | |
tokenizer_vocab: ./modules/tortoise-tts/tortoise/data/tokenizer.json | |
load_aligned_codes: False | |
data_type: img | |
] | |
val:[ | |
name: validation | |
n_workers: 2 | |
batch_size: 8 | |
mode: paired_voice_audio | |
path: ./training/louise/validation.txt | |
fetcher_mode: ['lj'] | |
phase: val | |
max_wav_length: 255995 | |
max_text_length: 200 | |
sample_rate: 22050 | |
load_conditioning: True | |
num_conditioning_candidates: 2 | |
conditioning_length: 44000 | |
use_bpe_tokenizer: True | |
tokenizer_vocab: ./modules/tortoise-tts/tortoise/data/tokenizer.json | |
load_aligned_codes: False | |
data_type: img | |
] | |
] | |
steps:[ | |
gpt_train:[ | |
training: gpt | |
loss_log_buffer: 500 | |
optimizer: adamw | |
optimizer_params:[ | |
lr: 1e-05 | |
weight_decay: 0.01 | |
beta1: 0.9 | |
beta2: 0.96 | |
] | |
clip_grad_eps: 4 | |
injectors:[ | |
paired_to_mel:[ | |
type: torch_mel_spectrogram | |
mel_norm_file: ./modules/tortoise-tts/tortoise/data/mel_norms.pth | |
in: wav | |
out: paired_mel | |
] | |
paired_cond_to_mel:[ | |
type: for_each | |
subtype: torch_mel_spectrogram | |
mel_norm_file: ./modules/tortoise-tts/tortoise/data/mel_norms.pth | |
in: conditioning | |
out: paired_conditioning_mel | |
] | |
to_codes:[ | |
type: discrete_token | |
in: paired_mel | |
out: paired_mel_codes | |
dvae_config: ./models/tortoise/train_diffusion_vocoder_22k_level.yml | |
] | |
paired_fwd_text:[ | |
type: generator | |
generator: gpt | |
in: ['paired_conditioning_mel', 'padded_text', 'text_lengths', 'paired_mel_codes', 'wav_lengths'] | |
out: ['loss_text_ce', 'loss_mel_ce', 'logits'] | |
] | |
] | |
losses:[ | |
text_ce:[ | |
type: direct | |
weight: 0.01 | |
key: loss_text_ce | |
] | |
mel_ce:[ | |
type: direct | |
weight: 1 | |
key: loss_mel_ce | |
] | |
] | |
] | |
] | |
networks:[ | |
gpt:[ | |
type: generator | |
which_model_G: unified_voice2 | |
kwargs:[ | |
layers: 30 | |
model_dim: 1024 | |
heads: 16 | |
max_text_tokens: 402 | |
max_mel_tokens: 604 | |
max_conditioning_inputs: 2 | |
mel_length_compression: 1024 | |
number_text_tokens: 256 | |
number_mel_codes: 8194 | |
start_mel_token: 8192 | |
stop_mel_token: 8193 | |
start_text_token: 255 | |
train_solo_embeddings: False | |
use_mel_codes_as_input: True | |
checkpointing: True | |
tortoise_compat: True | |
] | |
] | |
] | |
path:[ | |
strict_load: True | |
pretrain_model_gpt: ./models/tortoise/autoregressive.pth | |
root: ./ | |
experiments_root: ./training/louise/finetune | |
models: ./training/louise/finetune/models | |
training_state: ./training/louise/finetune/training_state | |
log: ./training/louise/finetune | |
val_images: ./training/louise/finetune/val_images | |
] | |
train:[ | |
niter: 4700 | |
warmup_iter: -1 | |
mega_batch_factor: 4 | |
val_freq: 100 | |
ema_enabled: False | |
default_lr_scheme: MultiStepLR | |
gen_lr_steps: [2, 4, 9, 18, 25, 33, 50, 59] | |
lr_gamma: 0.5 | |
] | |
eval:[ | |
pure: False | |
output_state: gen | |
] | |
logger:[ | |
save_checkpoint_freq: 100 | |
visuals: ['gen', 'mel'] | |
visual_debug_rate: 900 | |
is_mel_spectrogram: True | |
] | |
is_train: True | |
dist: False | |
23-04-24 02:06:48.156 - INFO: Random seed: 9424 | |
23-04-24 02:06:48.891 - INFO: Number of training data elements: 293, iters: 9 | |
23-04-24 02:06:48.891 - INFO: Total epochs needed: 523 for iters 4,700 | |
23-04-24 02:06:56.504 - INFO: Loading model for [./models/tortoise/autoregressive.pth] | |
23-04-24 02:07:00.586 - INFO: Start training from epoch: 0, iter: 0 | |