slegroux commited on
Commit
a086629
1 Parent(s): 731ad3d

obama model 12h

Browse files
.gitattributes CHANGED
@@ -29,3 +29,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ obama filter=lfs diff=lfs merge=lfs -text
33
+ hifigan filter=lfs diff=lfs merge=lfs -text
hifigan/UNIVERSAL_V1/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1bd98e99062ddbced38729a5252dc2aa772328d16d70097ac139dab2f269dc9
3
+ size 799
hifigan/UNIVERSAL_V1/g_02500000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771eaf4876485a35e25577563d390c262e23c2421e4a8c929eacfde34a5b7a60
3
+ size 55788858
maui-tacotron2.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ dataset:
3
+ seed: 42
4
+ shuffle: True # shuffle input list of text/audio
5
+ filename: '/home/syl20/data/en/LJSpeech/LJSpeech-1.1/ljs_audio_text_train_filelist.txt'
6
+ tokenizer:
7
+ lang: 'en'
8
+ audio:
9
+ max_wav_value: 32768.0
10
+ sampling_rate: 22050
11
+ mel_spectrogram:
12
+ sampling_rate: 22050
13
+ n_fft: 1024
14
+ hop_length: 256
15
+ win_length: 1024
16
+ n_mels: 80
17
+ f_min: 0.0
18
+ f_max: 8000.0
19
+
20
+ dataloader:
21
+ batch_size: 208
22
+ num_workers: 1
23
+ shuffle: True
24
+ pin_memory: False
25
+ drop_last: True
26
+
27
+ datamodule:
28
+ dataset:
29
+ data_dir: '/home/syl20/data/en/LJSpeech'
30
+ folder_in_archive: 'wavs'
31
+ url: '.'
32
+ download: False
33
+ seed: 42
34
+ train_split: 0.7
35
+ test_split: 0.295
36
+ val_split: 0.005
37
+
38
+ dataloader:
39
+ batch_size: 208
40
+ num_workers: 1
41
+ pin_memory: False
42
+ shuffle: False
43
+
44
+ tokenizer:
45
+ _target_: maui.data.text.tokenizers.Taco2Tokenizer
46
+ lang: 'en'
47
+
48
+ mel_spectrogram:
49
+ _target_: maui.data.audio.stft.MelSpecgram
50
+ n_fft: 1024
51
+ hop_length: 256
52
+ win_length: 1024
53
+ n_mels: 80
54
+ sampling_rate: 22050
55
+ f_min: 0.0
56
+ f_max: 8000.0
57
+
58
+ model:
59
+ mask_padding: True
60
+ text_embedder:
61
+ n_symbols: 148 #len(symbols)
62
+ symbols_embedding_dim: 512
63
+
64
+ encoder:
65
+ encoder_embedding_dim: 512
66
+ encoder_kernel_size: 5
67
+ encoder_n_convolutions: 3
68
+
69
+ decoder:
70
+ n_mel_channels: 80
71
+ n_frames_per_step: 1
72
+ encoder_embedding_dim: 512
73
+ attention_rnn_dim: 1024
74
+ decoder_rnn_dim: 1024
75
+ prenet_dim: 256
76
+ max_decoder_steps: 1000
77
+ gate_threshold: 0.5
78
+ p_attention_dropout: 0.1
79
+ p_decoder_dropout: 0.1
80
+ attention_dim: 128
81
+ attention_location_n_filters: 32
82
+ attention_location_kernel_size: 31
83
+
84
+ postnet:
85
+ n_mel_channels: 80
86
+ postnet_embedding_dim: 512
87
+ postnet_kernel_size: 5
88
+ postnet_n_convolutions: 5
89
+
90
+ distributed:
91
+ dist_backend: "nccl"
92
+ dist_url: "tcp://localhost:54321"
93
+ n_gpus: 8
94
+ rank: 0
95
+ group_name: "group_name"
96
+
97
+ training:
98
+ distributed_run: True # don't forget to call maui.utils.multiproc when running distributed
99
+ fp16: True
100
+ cudnn_enabled: True
101
+ cudnn_benchmark: False
102
+ learning_rate: 1e-3
103
+ use_saved_learning_rate: False
104
+ weight_decay: 1e-6
105
+ grad_clip_thresh: 1.0
106
+ ignore_layers: ['embedding.weight']
107
+ epochs: 15000
108
+ iters_per_checkpoint: 250
109
+ wandb: True
110
+ wandb_entity: slegroux
obama/checkpoint_9000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2e3659414058384c3226d7a7754014e7f3d51da0dec0dcaa077f1184d485d3
3
+ size 338417883