File size: 2,299 Bytes
a086629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

dataset:
  seed: 42
  shuffle: True # shuffle input list of text/audio
  filename: '/home/syl20/data/en/LJSpeech/LJSpeech-1.1/ljs_audio_text_train_filelist.txt'
  tokenizer:
    lang: 'en'
  audio:
    max_wav_value: 32768.0
    sampling_rate: 22050
  mel_spectrogram:
    sampling_rate: 22050
    n_fft: 1024
    hop_length: 256
    win_length: 1024
    n_mels: 80
    f_min: 0.0
    f_max: 8000.0

dataloader:
  batch_size: 208
  num_workers: 1
  shuffle: True
  pin_memory: False
  drop_last: True

datamodule:
  dataset:
    data_dir: '/home/syl20/data/en/LJSpeech'
    folder_in_archive: 'wavs'
    url: '.'
    download: False
    seed: 42
    train_split: 0.7
    test_split: 0.295
    val_split: 0.005

  dataloader:
    batch_size: 208
    num_workers: 1
    pin_memory: False
    shuffle: False

  tokenizer:
    _target_: maui.data.text.tokenizers.Taco2Tokenizer
    lang: 'en'

  mel_spectrogram:
    _target_: maui.data.audio.stft.MelSpecgram
    n_fft: 1024
    hop_length: 256
    win_length: 1024
    n_mels: 80
    sampling_rate: 22050
    f_min: 0.0
    f_max: 8000.0

model:
  mask_padding: True
  text_embedder:
    n_symbols: 148 #len(symbols)
    symbols_embedding_dim: 512

  encoder:
    encoder_embedding_dim: 512
    encoder_kernel_size: 5
    encoder_n_convolutions: 3

  decoder:
    n_mel_channels: 80
    n_frames_per_step: 1
    encoder_embedding_dim: 512
    attention_rnn_dim: 1024
    decoder_rnn_dim: 1024
    prenet_dim: 256
    max_decoder_steps: 1000
    gate_threshold: 0.5
    p_attention_dropout: 0.1
    p_decoder_dropout: 0.1
    attention_dim: 128
    attention_location_n_filters: 32
    attention_location_kernel_size: 31

  postnet:
    n_mel_channels: 80
    postnet_embedding_dim: 512
    postnet_kernel_size: 5
    postnet_n_convolutions: 5

distributed:
  dist_backend: "nccl"
  dist_url: "tcp://localhost:54321"
  n_gpus: 8
  rank: 0
  group_name: "group_name"

training:
  distributed_run: True # don't forget to call maui.utils.multiproc when running distributed
  fp16: True
  cudnn_enabled: True
  cudnn_benchmark: False
  learning_rate: 1e-3
  use_saved_learning_rate: False
  weight_decay: 1e-6
  grad_clip_thresh: 1.0
  ignore_layers: ['embedding.weight']
  epochs: 15000
  iters_per_checkpoint: 250
  wandb: True
  wandb_entity: slegroux