dataset: seed: 42 shuffle: True # shuffle input list of text/audio filename: '/home/syl20/data/en/LJSpeech/LJSpeech-1.1/ljs_audio_text_train_filelist.txt' tokenizer: lang: 'en' audio: max_wav_value: 32768.0 sampling_rate: 22050 mel_spectrogram: sampling_rate: 22050 n_fft: 1024 hop_length: 256 win_length: 1024 n_mels: 80 f_min: 0.0 f_max: 8000.0 dataloader: batch_size: 208 num_workers: 1 shuffle: True pin_memory: False drop_last: True datamodule: dataset: data_dir: '/home/syl20/data/en/LJSpeech' folder_in_archive: 'wavs' url: '.' download: False seed: 42 train_split: 0.7 test_split: 0.295 val_split: 0.005 dataloader: batch_size: 208 num_workers: 1 pin_memory: False shuffle: False tokenizer: _target_: maui.data.text.tokenizers.Taco2Tokenizer lang: 'en' mel_spectrogram: _target_: maui.data.audio.stft.MelSpecgram n_fft: 1024 hop_length: 256 win_length: 1024 n_mels: 80 sampling_rate: 22050 f_min: 0.0 f_max: 8000.0 model: mask_padding: True text_embedder: n_symbols: 148 #len(symbols) symbols_embedding_dim: 512 encoder: encoder_embedding_dim: 512 encoder_kernel_size: 5 encoder_n_convolutions: 3 decoder: n_mel_channels: 80 n_frames_per_step: 1 encoder_embedding_dim: 512 attention_rnn_dim: 1024 decoder_rnn_dim: 1024 prenet_dim: 256 max_decoder_steps: 1000 gate_threshold: 0.5 p_attention_dropout: 0.1 p_decoder_dropout: 0.1 attention_dim: 128 attention_location_n_filters: 32 attention_location_kernel_size: 31 postnet: n_mel_channels: 80 postnet_embedding_dim: 512 postnet_kernel_size: 5 postnet_n_convolutions: 5 distributed: dist_backend: "nccl" dist_url: "tcp://localhost:54321" n_gpus: 8 rank: 0 group_name: "group_name" training: distributed_run: True # don't forget to call maui.utils.multiproc when running distributed fp16: True cudnn_enabled: True cudnn_benchmark: False learning_rate: 1e-3 use_saved_learning_rate: False weight_decay: 1e-6 grad_clip_thresh: 1.0 ignore_layers: ['embedding.weight'] epochs: 15000 iters_per_checkpoint: 250 wandb: True wandb_entity: slegroux