arampacha commited on
Commit
b9c8a7f
1 Parent(s): b860ae9
Files changed (8) hide show
  1. README.md +11 -1
  2. config.yaml +33 -0
  3. gcmvn_stats.npz +0 -0
  4. hifigan.bin +3 -0
  5. hifigan.json +37 -0
  6. pytorch_model.pt +3 -0
  7. speakers.txt +4 -0
  8. vocab.txt +70 -0
README.md CHANGED
@@ -1,3 +1,13 @@
1
  ---
2
- license: gpl-3.0
 
 
 
 
 
 
 
 
 
3
  ---
 
 
1
  ---
2
+ library_name: fairseq
3
+ task: text-to-speech
4
+ tags:
5
+ - fairseq
6
+ - audio
7
+ - text-to-speech
8
+ language: en
9
+ widget:
10
+ - text: "Hello, this is a test run."
11
+ example_title: "Hello, this is a test run."
12
  ---
13
+ # fastspeech2-mf4
config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio_root: ./
2
+ features:
3
+ energy_max: 5.850490093231201
4
+ energy_min: 0.0
5
+ eps: 1.0e-05
6
+ f_max: 8000
7
+ f_min: 0
8
+ hop_len_t: 0.011609977324263039
9
+ hop_length: 256
10
+ n_fft: 1024
11
+ n_mels: 80
12
+ n_stft: 513
13
+ pitch_max: 6.524898211542482
14
+ pitch_min: 1.0e-08
15
+ sample_rate: 22050
16
+ type: spectrogram+melscale+log
17
+ win_len_t: 0.046439909297052155
18
+ win_length: 1024
19
+ window_fn: hann
20
+ global_cmvn:
21
+ stats_npz_path: gcmvn_stats.npz
22
+ sample_rate: 22050
23
+ transforms:
24
+ '*':
25
+ - global_cmvn
26
+ vocab_filename: vocab.txt
27
+ speaker_set_filename: speakers.txt
28
+ vocoder:
29
+ type: hifigan
30
+ config: hifigan.json
31
+ checkpoint: hifigan.bin
32
+ hub:
33
+ phonemizer: g2p
gcmvn_stats.npz ADDED
Binary file (1.14 kB). View file
 
hifigan.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc43f2a7cff67b91696c67cb577391be2ed64cd9bc10aedcc9e08bb7c0b03e44
3
+ size 55819885
hifigan.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 0,
4
+ "batch_size": 16,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [8,8,2,2],
12
+ "upsample_kernel_sizes": [16,16,4,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+
17
+ "segment_size": 8192,
18
+ "num_mels": 80,
19
+ "num_freq": 1025,
20
+ "n_fft": 1024,
21
+ "hop_size": 256,
22
+ "win_size": 1024,
23
+
24
+ "sampling_rate": 22050,
25
+
26
+ "fmin": 0,
27
+ "fmax": 8000,
28
+ "fmax_for_loss": null,
29
+
30
+ "num_workers": 4,
31
+
32
+ "dist_config": {
33
+ "dist_backend": "nccl",
34
+ "dist_url": "tcp://localhost:54321",
35
+ "world_size": 1
36
+ }
37
+ }
pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fea0fd38660ca493154d720cbf2a111b5a7ba135e59da4c4f5d44841333b604a
3
+ size 387441213
speakers.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Freeman angry
2
+ Freeman happy
3
+ Freeman narration
4
+ Freeman normal
vocab.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ N 333705
2
+ T 328991
3
+ AH0 293550
4
+ S 266464
5
+ R 228299
6
+ L 206080
7
+ D 205594
8
+ IH0 182689
9
+ K 164159
10
+ sp 142640
11
+ M 138027
12
+ Z 122502
13
+ IH1 116897
14
+ AA1 113508
15
+ TH 113159
16
+ F 112260
17
+ EH1 104541
18
+ P 101009
19
+ W 93292
20
+ ER0 89396
21
+ B 87421
22
+ IY0 83572
23
+ AE1 80417
24
+ HH 77016
25
+ EY1 76588
26
+ AO1 75558
27
+ AY1 70751
28
+ AH1 63903
29
+ V 58163
30
+ UW1 54663
31
+ IY1 51596
32
+ NG 49090
33
+ OW0 47402
34
+ G 44006
35
+ Y 41567
36
+ SH 41278
37
+ DH 40317
38
+ OW1 37952
39
+ AE2 36196
40
+ ER1 30813
41
+ JH 29213
42
+ CH 24021
43
+ AW1 20297
44
+ UH1 18645
45
+ EH2 12445
46
+ EY2 10405
47
+ IH2 8282
48
+ OW2 7128
49
+ AY2 7050
50
+ UW0 6859
51
+ AA2 5578
52
+ OY1 5382
53
+ AA0 5067
54
+ AO2 4570
55
+ AW2 4526
56
+ AO0 3906
57
+ EH0 3684
58
+ AE0 3635
59
+ IY2 3150
60
+ ZH 3126
61
+ UW2 3107
62
+ AY0 2345
63
+ AH2 2238
64
+ UH2 1020
65
+ ER2 973
66
+ EY0 816
67
+ AW0 558
68
+ UH0 295
69
+ OY2 266
70
+ OY0 84